• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1999-2015, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *   file name:  ubidi.c
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 1999jul27
16 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
17 *
18 */
19 
20 #include "cmemory.h"
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/uchar.h"
24 #include "unicode/ubidi.h"
25 #include "unicode/utf16.h"
26 #include "ubidi_props.h"
27 #include "ubidiimp.h"
28 #include "uassert.h"
29 
30 /*
31  * General implementation notes:
32  *
33  * Throughout the implementation, there are comments like (W2) that refer to
34  * rules of the BiDi algorithm, in this example to the second rule of the
35  * resolution of weak types.
36  *
37  * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
38  * character according to UTF-16, the second UChar gets the directional property of
39  * the entire character assigned, while the first one gets a BN, a boundary
40  * neutral, type, which is ignored by most of the algorithm according to
41  * rule (X9) and the implementation suggestions of the BiDi algorithm.
42  *
43  * Later, adjustWSLevels() will set the level for each BN to that of the
44  * following character (UChar), which results in surrogate pairs getting the
45  * same level on each of their surrogates.
46  *
47  * In a UTF-8 implementation, the same thing could be done: the last byte of
48  * a multi-byte sequence would get the "real" property, while all previous
49  * bytes of that sequence would get BN.
50  *
51  * It is not possible to assign all those parts of a character the same real
52  * property because this would fail in the resolution of weak types with rules
53  * that look at immediately surrounding types.
54  *
55  * As a related topic, this implementation does not remove Boundary Neutral
56  * types from the input, but ignores them wherever this is relevant.
57  * For example, the loop for the resolution of the weak types reads
58  * types until it finds a non-BN.
59  * Also, explicit embedding codes are neither changed into BN nor removed.
60  * They are only treated the same way real BNs are.
61  * As stated before, adjustWSLevels() takes care of them at the end.
62  * For the purpose of conformance, the levels of all these codes
63  * do not matter.
64  *
65  * Note that this implementation modifies the dirProps
66  * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
67  * X6, N0 (replace paired brackets by L or R).
68  *
69  * In this implementation, the resolution of weak types (W1 to W6),
70  * neutrals (N1 and N2), and the assignment of the resolved level (In)
71  * are all done in one single loop, in resolveImplicitLevels().
72  * Changes of dirProp values are done on the fly, without writing
73  * them back to the dirProps array.
74  *
75  *
76  * This implementation contains code that allows to bypass steps of the
77  * algorithm that are not needed on the specific paragraph
78  * in order to speed up the most common cases considerably,
79  * like text that is entirely LTR, or RTL text without numbers.
80  *
81  * Most of this is done by setting a bit for each directional property
82  * in a flags variable and later checking for whether there are
83  * any LTR characters or any RTL characters, or both, whether
84  * there are any explicit embedding codes, etc.
85  *
86  * If the (Xn) steps are performed, then the flags are re-evaluated,
87  * because they will then not contain the embedding codes any more
88  * and will be adjusted for override codes, so that subsequently
89  * more bypassing may be possible than what the initial flags suggested.
90  *
91  * If the text is not mixed-directional, then the
92  * algorithm steps for the weak type resolution are not performed,
93  * and all levels are set to the paragraph level.
94  *
95  * If there are no explicit embedding codes, then the (Xn) steps
96  * are not performed.
97  *
98  * If embedding levels are supplied as a parameter, then all
99  * explicit embedding codes are ignored, and the (Xn) steps
100  * are not performed.
101  *
102  * White Space types could get the level of the run they belong to,
103  * and are checked with a test of (flags&MASK_EMBEDDING) to
104  * consider if the paragraph direction should be considered in
105  * the flags variable.
106  *
107  * If there are no White Space types in the paragraph, then
108  * (L1) is not necessary in adjustWSLevels().
109  */
110 
111 /* to avoid some conditional statements, use tiny constant arrays */
112 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
113 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
114 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
115 
116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
117 #define DIRPROP_FLAG_E(level)  flagE[(level)&1]
118 #define DIRPROP_FLAG_O(level)  flagO[(level)&1]
119 
120 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
121 
122 #define NO_OVERRIDE(level)  ((level)&~UBIDI_LEVEL_OVERRIDE)
123 
124 /* UBiDi object management -------------------------------------------------- */
125 
126 U_CAPI UBiDi * U_EXPORT2
ubidi_open(void)127 ubidi_open(void)
128 {
129     UErrorCode errorCode=U_ZERO_ERROR;
130     return ubidi_openSized(0, 0, &errorCode);
131 }
132 
133 U_CAPI UBiDi * U_EXPORT2
ubidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)134 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
135     UBiDi *pBiDi;
136 
137     /* check the argument values */
138     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
139         return NULL;
140     } else if(maxLength<0 || maxRunCount<0) {
141         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
142         return NULL;    /* invalid arguments */
143     }
144 
145     /* allocate memory for the object */
146     pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
147     if(pBiDi==NULL) {
148         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
149         return NULL;
150     }
151 
152     /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
153     uprv_memset(pBiDi, 0, sizeof(UBiDi));
154 
155     /* allocate memory for arrays as requested */
156     if(maxLength>0) {
157         if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
158             !getInitialLevelsMemory(pBiDi, maxLength)
159         ) {
160             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
161         }
162     } else {
163         pBiDi->mayAllocateText=TRUE;
164     }
165 
166     if(maxRunCount>0) {
167         if(maxRunCount==1) {
168             /* use simpleRuns[] */
169             pBiDi->runsSize=sizeof(Run);
170         } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
171             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
172         }
173     } else {
174         pBiDi->mayAllocateRuns=TRUE;
175     }
176 
177     if(U_SUCCESS(*pErrorCode)) {
178         return pBiDi;
179     } else {
180         ubidi_close(pBiDi);
181         return NULL;
182     }
183 }
184 
185 /*
186  * We are allowed to allocate memory if memory==NULL or
187  * mayAllocate==TRUE for each array that we need.
188  * We also try to grow memory as needed if we
189  * allocate it.
190  *
191  * Assume sizeNeeded>0.
192  * If *pMemory!=NULL, then assume *pSize>0.
193  *
194  * ### this realloc() may unnecessarily copy the old data,
195  * which we know we don't need any more;
196  * is this the best way to do this??
197  */
198 U_CFUNC UBool
ubidi_getMemory(BidiMemoryForAllocation * bidiMem,int32_t * pSize,UBool mayAllocate,int32_t sizeNeeded)199 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
200     void **pMemory = (void **)bidiMem;
201     /* check for existing memory */
202     if(*pMemory==NULL) {
203         /* we need to allocate memory */
204         if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
205             *pSize=sizeNeeded;
206             return TRUE;
207         } else {
208             return FALSE;
209         }
210     } else {
211         if(sizeNeeded<=*pSize) {
212             /* there is already enough memory */
213             return TRUE;
214         }
215         else if(!mayAllocate) {
216             /* not enough memory, and we must not allocate */
217             return FALSE;
218         } else {
219             /* we try to grow */
220             void *memory;
221             /* in most cases, we do not need the copy-old-data part of
222              * realloc, but it is needed when adding runs using getRunsMemory()
223              * in setParaRunsOnly()
224              */
225             if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
226                 *pMemory=memory;
227                 *pSize=sizeNeeded;
228                 return TRUE;
229             } else {
230                 /* we failed to grow */
231                 return FALSE;
232             }
233         }
234     }
235 }
236 
237 U_CAPI void U_EXPORT2
ubidi_close(UBiDi * pBiDi)238 ubidi_close(UBiDi *pBiDi) {
239     if(pBiDi!=NULL) {
240         pBiDi->pParaBiDi=NULL;          /* in case one tries to reuse this block */
241         if(pBiDi->dirPropsMemory!=NULL) {
242             uprv_free(pBiDi->dirPropsMemory);
243         }
244         if(pBiDi->levelsMemory!=NULL) {
245             uprv_free(pBiDi->levelsMemory);
246         }
247         if(pBiDi->openingsMemory!=NULL) {
248             uprv_free(pBiDi->openingsMemory);
249         }
250         if(pBiDi->parasMemory!=NULL) {
251             uprv_free(pBiDi->parasMemory);
252         }
253         if(pBiDi->runsMemory!=NULL) {
254             uprv_free(pBiDi->runsMemory);
255         }
256         if(pBiDi->isolatesMemory!=NULL) {
257             uprv_free(pBiDi->isolatesMemory);
258         }
259         if(pBiDi->insertPoints.points!=NULL) {
260             uprv_free(pBiDi->insertPoints.points);
261         }
262 
263         uprv_free(pBiDi);
264     }
265 }
266 
267 /* set to approximate "inverse BiDi" ---------------------------------------- */
268 
269 U_CAPI void U_EXPORT2
ubidi_setInverse(UBiDi * pBiDi,UBool isInverse)270 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
271     if(pBiDi!=NULL) {
272         pBiDi->isInverse=isInverse;
273         pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
274                                           : UBIDI_REORDER_DEFAULT;
275     }
276 }
277 
278 U_CAPI UBool U_EXPORT2
ubidi_isInverse(UBiDi * pBiDi)279 ubidi_isInverse(UBiDi *pBiDi) {
280     if(pBiDi!=NULL) {
281         return pBiDi->isInverse;
282     } else {
283         return FALSE;
284     }
285 }
286 
287 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
288  * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
289  * concept of RUNS_ONLY which is a double operation.
290  * It could be advantageous to divide this into 3 concepts:
291  * a) Operation: direct / inverse / RUNS_ONLY
292  * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
293  * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
294  * This would allow combinations not possible today like RUNS_ONLY with
295  * NUMBERS_SPECIAL.
296  * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
297  * REMOVE_CONTROLS for the inverse step.
298  * Not all combinations would be supported, and probably not all do make sense.
299  * This would need to document which ones are supported and what are the
300  * fallbacks for unsupported combinations.
301  */
302 U_CAPI void U_EXPORT2
ubidi_setReorderingMode(UBiDi * pBiDi,UBiDiReorderingMode reorderingMode)303 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
304     if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
305                         && (reorderingMode < UBIDI_REORDER_COUNT)) {
306         pBiDi->reorderingMode = reorderingMode;
307         pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
308     }
309 }
310 
311 U_CAPI UBiDiReorderingMode U_EXPORT2
ubidi_getReorderingMode(UBiDi * pBiDi)312 ubidi_getReorderingMode(UBiDi *pBiDi) {
313     if (pBiDi!=NULL) {
314         return pBiDi->reorderingMode;
315     } else {
316         return UBIDI_REORDER_DEFAULT;
317     }
318 }
319 
320 U_CAPI void U_EXPORT2
ubidi_setReorderingOptions(UBiDi * pBiDi,uint32_t reorderingOptions)321 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
322     if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
323         reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
324     }
325     if (pBiDi!=NULL) {
326         pBiDi->reorderingOptions=reorderingOptions;
327     }
328 }
329 
330 U_CAPI uint32_t U_EXPORT2
ubidi_getReorderingOptions(UBiDi * pBiDi)331 ubidi_getReorderingOptions(UBiDi *pBiDi) {
332     if (pBiDi!=NULL) {
333         return pBiDi->reorderingOptions;
334     } else {
335         return 0;
336     }
337 }
338 
339 U_CAPI UBiDiDirection U_EXPORT2
ubidi_getBaseDirection(const UChar * text,int32_t length)340 ubidi_getBaseDirection(const UChar *text,
341 int32_t length){
342 
343     int32_t i;
344     UChar32 uchar;
345     UCharDirection dir;
346 
347     if( text==NULL || length<-1 ){
348         return UBIDI_NEUTRAL;
349     }
350 
351     if(length==-1) {
352         length=u_strlen(text);
353     }
354 
355     for( i = 0 ; i < length; ) {
356         /* i is incremented by U16_NEXT */
357         U16_NEXT(text, i, length, uchar);
358         dir = u_charDirection(uchar);
359         if( dir == U_LEFT_TO_RIGHT )
360                 return UBIDI_LTR;
361         if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
362                 return UBIDI_RTL;
363     }
364     return UBIDI_NEUTRAL;
365 }
366 
367 /* perform (P2)..(P3) ------------------------------------------------------- */
368 
369 /**
370  * Returns the directionality of the first strong character
371  * after the last B in prologue, if any.
372  * Requires prologue!=null.
373  */
374 static DirProp
firstL_R_AL(UBiDi * pBiDi)375 firstL_R_AL(UBiDi *pBiDi) {
376     const UChar *text=pBiDi->prologue;
377     int32_t length=pBiDi->proLength;
378     int32_t i;
379     UChar32 uchar;
380     DirProp dirProp, result=ON;
381     for(i=0; i<length; ) {
382         /* i is incremented by U16_NEXT */
383         U16_NEXT(text, i, length, uchar);
384         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
385         if(result==ON) {
386             if(dirProp==L || dirProp==R || dirProp==AL) {
387                 result=dirProp;
388             }
389         } else {
390             if(dirProp==B) {
391                 result=ON;
392             }
393         }
394     }
395     return result;
396 }
397 
398 /*
399  * Check that there are enough entries in the array pointed to by pBiDi->paras
400  */
401 static UBool
checkParaCount(UBiDi * pBiDi)402 checkParaCount(UBiDi *pBiDi) {
403     int32_t count=pBiDi->paraCount;
404     if(pBiDi->paras==pBiDi->simpleParas) {
405         if(count<=SIMPLE_PARAS_COUNT)
406             return TRUE;
407         if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
408             return FALSE;
409         pBiDi->paras=pBiDi->parasMemory;
410         uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
411         return TRUE;
412     }
413     if(!getInitialParasMemory(pBiDi, count * 2))
414         return FALSE;
415     pBiDi->paras=pBiDi->parasMemory;
416     return TRUE;
417 }
418 
419 /*
420  * Get the directional properties for the text, calculate the flags bit-set, and
421  * determine the paragraph level if necessary (in pBiDi->paras[i].level).
422  * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
423  * When encountering an FSI, it is initially replaced with an LRI, which is the
424  * default. Only if a strong R or AL is found within its scope will the LRI be
425  * replaced by an RLI.
426  */
427 static UBool
getDirProps(UBiDi * pBiDi)428 getDirProps(UBiDi *pBiDi) {
429     const UChar *text=pBiDi->text;
430     DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
431 
432     int32_t i=0, originalLength=pBiDi->originalLength;
433     Flags flags=0;      /* collect all directionalities in the text */
434     UChar32 uchar;
435     DirProp dirProp=0, defaultParaLevel=0;  /* initialize to avoid compiler warnings */
436     UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
437     /* for inverse BiDi, the default para level is set to RTL if there is a
438        strong R or AL character at either end of the text                            */
439     UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
440             (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
441              pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
442     int32_t lastArabicPos=-1;
443     int32_t controlCount=0;
444     UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
445                                        UBIDI_OPTION_REMOVE_CONTROLS);
446 
447     enum State {
448          NOT_SEEKING_STRONG,            /* 0: not contextual paraLevel, not after FSI */
449          SEEKING_STRONG_FOR_PARA,       /* 1: looking for first strong char in para */
450          SEEKING_STRONG_FOR_FSI,        /* 2: looking for first strong after FSI */
451          LOOKING_FOR_PDI                /* 3: found strong after FSI, looking for PDI */
452     };
453     State state;
454     DirProp lastStrong=ON;              /* for default level & inverse BiDi */
455     /* The following stacks are used to manage isolate sequences. Those
456        sequences may be nested, but obviously never more deeply than the
457        maximum explicit embedding level.
458        lastStack is the index of the last used entry in the stack. A value of -1
459        means that there is no open isolate sequence.
460        lastStack is reset to -1 on paragraph boundaries. */
461     /* The following stack contains the position of the initiator of
462        each open isolate sequence */
463     int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
464     /* The following stack contains the last known state before
465        encountering the initiator of an isolate sequence */
466     State  previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
467     int32_t stackLast=-1;
468 
469     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
470         pBiDi->length=0;
471     defaultParaLevel=pBiDi->paraLevel&1;
472     if(isDefaultLevel) {
473         pBiDi->paras[0].level=defaultParaLevel;
474         lastStrong=defaultParaLevel;
475         if(pBiDi->proLength>0 &&                    /* there is a prologue */
476            (dirProp=firstL_R_AL(pBiDi))!=ON) {  /* with a strong character */
477             if(dirProp==L)
478                 pBiDi->paras[0].level=0;    /* set the default para level */
479             else
480                 pBiDi->paras[0].level=1;    /* set the default para level */
481             state=NOT_SEEKING_STRONG;
482         } else {
483             state=SEEKING_STRONG_FOR_PARA;
484         }
485     } else {
486         pBiDi->paras[0].level=pBiDi->paraLevel;
487         state=NOT_SEEKING_STRONG;
488     }
489     /* count paragraphs and determine the paragraph level (P2..P3) */
490     /*
491      * see comment in ubidi.h:
492      * the UBIDI_DEFAULT_XXX values are designed so that
493      * their bit 0 alone yields the intended default
494      */
495     for( /* i=0 above */ ; i<originalLength; ) {
496         /* i is incremented by U16_NEXT */
497         U16_NEXT(text, i, originalLength, uchar);
498         flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
499         dirProps[i-1]=dirProp;
500         if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
501             flags|=DIRPROP_FLAG(BN);
502             dirProps[i-2]=BN;
503         }
504         if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
505             controlCount++;
506         if(dirProp==L) {
507             if(state==SEEKING_STRONG_FOR_PARA) {
508                 pBiDi->paras[pBiDi->paraCount-1].level=0;
509                 state=NOT_SEEKING_STRONG;
510             }
511             else if(state==SEEKING_STRONG_FOR_FSI) {
512                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
513                     /* no need for next statement, already set by default */
514                     /* dirProps[isolateStartStack[stackLast]]=LRI; */
515                     flags|=DIRPROP_FLAG(LRI);
516                 }
517                 state=LOOKING_FOR_PDI;
518             }
519             lastStrong=L;
520             continue;
521         }
522         if(dirProp==R || dirProp==AL) {
523             if(state==SEEKING_STRONG_FOR_PARA) {
524                 pBiDi->paras[pBiDi->paraCount-1].level=1;
525                 state=NOT_SEEKING_STRONG;
526             }
527             else if(state==SEEKING_STRONG_FOR_FSI) {
528                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
529                     dirProps[isolateStartStack[stackLast]]=RLI;
530                     flags|=DIRPROP_FLAG(RLI);
531                 }
532                 state=LOOKING_FOR_PDI;
533             }
534             lastStrong=R;
535             if(dirProp==AL)
536                 lastArabicPos=i-1;
537             continue;
538         }
539         if(dirProp>=FSI && dirProp<=RLI) {  /* FSI, LRI or RLI */
540             stackLast++;
541             if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
542                 isolateStartStack[stackLast]=i-1;
543                 previousStateStack[stackLast]=state;
544             }
545             if(dirProp==FSI) {
546                 dirProps[i-1]=LRI;      /* default if no strong char */
547                 state=SEEKING_STRONG_FOR_FSI;
548             }
549             else
550                 state=LOOKING_FOR_PDI;
551             continue;
552         }
553         if(dirProp==PDI) {
554             if(state==SEEKING_STRONG_FOR_FSI) {
555                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
556                     /* no need for next statement, already set by default */
557                     /* dirProps[isolateStartStack[stackLast]]=LRI; */
558                     flags|=DIRPROP_FLAG(LRI);
559                 }
560             }
561             if(stackLast>=0) {
562                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
563                     state=previousStateStack[stackLast];
564                 stackLast--;
565             }
566             continue;
567         }
568         if(dirProp==B) {
569             if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
570                 continue;
571             pBiDi->paras[pBiDi->paraCount-1].limit=i;
572             if(isDefaultLevelInverse && lastStrong==R)
573                 pBiDi->paras[pBiDi->paraCount-1].level=1;
574             if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
575                 /* When streaming, we only process whole paragraphs
576                    thus some updates are only done on paragraph boundaries */
577                 pBiDi->length=i;        /* i is index to next character */
578                 pBiDi->controlCount=controlCount;
579             }
580             if(i<originalLength) {              /* B not last char in text */
581                 pBiDi->paraCount++;
582                 if(checkParaCount(pBiDi)==FALSE)    /* not enough memory for a new para entry */
583                     return FALSE;
584                 if(isDefaultLevel) {
585                     pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
586                     state=SEEKING_STRONG_FOR_PARA;
587                     lastStrong=defaultParaLevel;
588                 } else {
589                     pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
590                     state=NOT_SEEKING_STRONG;
591                 }
592                 stackLast=-1;
593             }
594             continue;
595         }
596     }
597     /* Ignore still open isolate sequences with overflow */
598     if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
599         stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
600         state=SEEKING_STRONG_FOR_FSI;   /* to be on the safe side */
601     }
602     /* Resolve direction of still unresolved open FSI sequences */
603     while(stackLast>=0) {
604         if(state==SEEKING_STRONG_FOR_FSI) {
605             /* no need for next statement, already set by default */
606             /* dirProps[isolateStartStack[stackLast]]=LRI; */
607             flags|=DIRPROP_FLAG(LRI);
608             break;
609         }
610         state=previousStateStack[stackLast];
611         stackLast--;
612     }
613     /* When streaming, ignore text after the last paragraph separator */
614     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
615         if(pBiDi->length<originalLength)
616             pBiDi->paraCount--;
617     } else {
618         pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
619         pBiDi->controlCount=controlCount;
620     }
621     /* For inverse bidi, default para direction is RTL if there is
622        a strong R or AL at either end of the paragraph */
623     if(isDefaultLevelInverse && lastStrong==R) {
624         pBiDi->paras[pBiDi->paraCount-1].level=1;
625     }
626     if(isDefaultLevel) {
627         pBiDi->paraLevel=static_cast<UBiDiLevel>(pBiDi->paras[0].level);
628     }
629     /* The following is needed to resolve the text direction for default level
630        paragraphs containing no strong character */
631     for(i=0; i<pBiDi->paraCount; i++)
632         flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
633 
634     if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
635         flags|=DIRPROP_FLAG(L);
636     }
637     pBiDi->flags=flags;
638     pBiDi->lastArabicPos=lastArabicPos;
639     return TRUE;
640 }
641 
642 /* determine the paragraph level at position index */
643 U_CFUNC UBiDiLevel
ubidi_getParaLevelAtIndex(const UBiDi * pBiDi,int32_t pindex)644 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
645     int32_t i;
646     for(i=0; i<pBiDi->paraCount; i++)
647         if(pindex<pBiDi->paras[i].limit)
648             break;
649     if(i>=pBiDi->paraCount)
650         i=pBiDi->paraCount-1;
651     return (UBiDiLevel)(pBiDi->paras[i].level);
652 }
653 
654 /* Functions for handling paired brackets ----------------------------------- */
655 
656 /* In the isoRuns array, the first entry is used for text outside of any
657    isolate sequence.  Higher entries are used for each more deeply nested
658    isolate sequence. isoRunLast is the index of the last used entry.  The
659    openings array is used to note the data of opening brackets not yet
660    matched by a closing bracket, or matched but still susceptible to change
661    level.
662    Each isoRun entry contains the index of the first and
663    one-after-last openings entries for pending opening brackets it
664    contains.  The next openings entry to use is the one-after-last of the
665    most deeply nested isoRun entry.
666    isoRun entries also contain their current embedding level and the last
667    encountered strong character, since these will be needed to resolve
668    the level of paired brackets.  */
669 
670 static void
bracketInit(UBiDi * pBiDi,BracketData * bd)671 bracketInit(UBiDi *pBiDi, BracketData *bd) {
672     bd->pBiDi=pBiDi;
673     bd->isoRunLast=0;
674     bd->isoRuns[0].start=0;
675     bd->isoRuns[0].limit=0;
676     bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
677     UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1;
678     bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t;
679     bd->isoRuns[0].contextDir = (UBiDiDirection)t;
680     bd->isoRuns[0].contextPos=0;
681     if(pBiDi->openingsMemory) {
682         bd->openings=pBiDi->openingsMemory;
683         bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
684     } else {
685         bd->openings=bd->simpleOpenings;
686         bd->openingsCount=SIMPLE_OPENINGS_COUNT;
687     }
688     bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
689                          bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
690 }
691 
692 /* paragraph boundary */
693 static void
bracketProcessB(BracketData * bd,UBiDiLevel level)694 bracketProcessB(BracketData *bd, UBiDiLevel level) {
695     bd->isoRunLast=0;
696     bd->isoRuns[0].limit=0;
697     bd->isoRuns[0].level=level;
698     bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1;
699     bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1);
700     bd->isoRuns[0].contextPos=0;
701 }
702 
703 /* LRE, LRO, RLE, RLO, PDF */
704 static void
bracketProcessBoundary(BracketData * bd,int32_t lastCcPos,UBiDiLevel contextLevel,UBiDiLevel embeddingLevel)705 bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
706                        UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
707     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
708     DirProp *dirProps=bd->pBiDi->dirProps;
709     if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO)  /* after an isolate */
710         return;
711     if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel))   /* not a PDF */
712         contextLevel=embeddingLevel;
713     pLastIsoRun->limit=pLastIsoRun->start;
714     pLastIsoRun->level=embeddingLevel;
715     pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1;
716     pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1);
717     pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos;
718 }
719 
720 /* LRI or RLI */
721 static void
bracketProcessLRI_RLI(BracketData * bd,UBiDiLevel level)722 bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
723     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
724     int16_t lastLimit;
725     pLastIsoRun->lastBase=ON;
726     lastLimit=pLastIsoRun->limit;
727     bd->isoRunLast++;
728     pLastIsoRun++;
729     pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
730     pLastIsoRun->level=level;
731     pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1;
732     pLastIsoRun->contextDir=(UBiDiDirection)(level&1);
733     pLastIsoRun->contextPos=0;
734 }
735 
736 /* PDI */
737 static void
bracketProcessPDI(BracketData * bd)738 bracketProcessPDI(BracketData *bd) {
739     IsoRun *pLastIsoRun;
740     bd->isoRunLast--;
741     pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
742     pLastIsoRun->lastBase=ON;
743 }
744 
745 /* newly found opening bracket: create an openings entry */
746 static UBool                            /* return TRUE if success */
bracketAddOpening(BracketData * bd,UChar match,int32_t position)747 bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
748     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
749     Opening *pOpening;
750     if(pLastIsoRun->limit>=bd->openingsCount) {  /* no available new entry */
751         UBiDi *pBiDi=bd->pBiDi;
752         if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
753             return FALSE;
754         if(bd->openings==bd->simpleOpenings)
755             uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
756                         SIMPLE_OPENINGS_COUNT * sizeof(Opening));
757         bd->openings=pBiDi->openingsMemory;     /* may have changed */
758         bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
759     }
760     pOpening=&bd->openings[pLastIsoRun->limit];
761     pOpening->position=position;
762     pOpening->match=match;
763     pOpening->contextDir=pLastIsoRun->contextDir;
764     pOpening->contextPos=pLastIsoRun->contextPos;
765     pOpening->flags=0;
766     pLastIsoRun->limit++;
767     return TRUE;
768 }
769 
770 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
771 static void
fixN0c(BracketData * bd,int32_t openingIndex,int32_t newPropPosition,DirProp newProp)772 fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
773     /* This function calls itself recursively */
774     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
775     Opening *qOpening;
776     DirProp *dirProps=bd->pBiDi->dirProps;
777     int32_t k, openingPosition, closingPosition;
778     for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
779         if(qOpening->match>=0)      /* not an N0c match */
780             continue;
781         if(newPropPosition<qOpening->contextPos)
782             break;
783         if(newPropPosition>=qOpening->position)
784             continue;
785         if(newProp==qOpening->contextDir)
786             break;
787         openingPosition=qOpening->position;
788         dirProps[openingPosition]=newProp;
789         closingPosition=-(qOpening->match);
790         dirProps[closingPosition]=newProp;
791         qOpening->match=0;                      /* prevent further changes */
792         fixN0c(bd, k, openingPosition, newProp);
793         fixN0c(bd, k, closingPosition, newProp);
794     }
795 }
796 
797 /* process closing bracket */
798 static DirProp              /* return L or R if N0b or N0c, ON if N0d */
bracketProcessClosing(BracketData * bd,int32_t openIdx,int32_t position)799 bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
800     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
801     Opening *pOpening, *qOpening;
802     UBiDiDirection direction;
803     UBool stable;
804     DirProp newProp;
805     pOpening=&bd->openings[openIdx];
806     direction=(UBiDiDirection)(pLastIsoRun->level&1);
807     stable=TRUE;            /* assume stable until proved otherwise */
808 
809     /* The stable flag is set when brackets are paired and their
810        level is resolved and cannot be changed by what will be
811        found later in the source string.
812        An unstable match can occur only when applying N0c, where
813        the resolved level depends on the preceding context, and
814        this context may be affected by text occurring later.
815        Example: RTL paragraph containing:  abc[(latin) HEBREW]
816        When the closing parenthesis is encountered, it appears
817        that N0c1 must be applied since 'abc' sets an opposite
818        direction context and both parentheses receive level 2.
819        However, when the closing square bracket is processed,
820        N0b applies because of 'HEBREW' being included within the
821        brackets, thus the square brackets are treated like R and
822        receive level 1. However, this changes the preceding
823        context of the opening parenthesis, and it now appears
824        that N0c2 must be applied to the parentheses rather than
825        N0c1. */
826 
827     if((direction==0 && pOpening->flags&FOUND_L) ||
828        (direction==1 && pOpening->flags&FOUND_R)) {                         /* N0b */
829         newProp=static_cast<DirProp>(direction);
830     }
831     else if(pOpening->flags&(FOUND_L|FOUND_R)) {                            /* N0c */
832         /* it is stable if there is no containing pair or in
833            conditions too complicated and not worth checking */
834         stable=(openIdx==pLastIsoRun->start);
835         if(direction!=pOpening->contextDir)
836             newProp= static_cast<DirProp>(pOpening->contextDir);           /* N0c1 */
837         else
838             newProp= static_cast<DirProp>(direction);                      /* N0c2 */
839     } else {
840         /* forget this and any brackets nested within this pair */
841         pLastIsoRun->limit= static_cast<uint16_t>(openIdx);
842         return ON;                                                          /* N0d */
843     }
844     bd->pBiDi->dirProps[pOpening->position]=newProp;
845     bd->pBiDi->dirProps[position]=newProp;
846     /* Update nested N0c pairs that may be affected */
847     fixN0c(bd, openIdx, pOpening->position, newProp);
848     if(stable) {
849         pLastIsoRun->limit= static_cast<uint16_t>(openIdx); /* forget any brackets nested within this pair */
850         /* remove lower located synonyms if any */
851         while(pLastIsoRun->limit>pLastIsoRun->start &&
852               bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
853             pLastIsoRun->limit--;
854     } else {
855         int32_t k;
856         pOpening->match=-position;
857         /* neutralize lower located synonyms if any */
858         k=openIdx-1;
859         while(k>=pLastIsoRun->start &&
860               bd->openings[k].position==pOpening->position)
861             bd->openings[k--].match=0;
862         /* neutralize any unmatched opening between the current pair;
863            this will also neutralize higher located synonyms if any */
864         for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
865             qOpening=&bd->openings[k];
866             if(qOpening->position>=position)
867                 break;
868             if(qOpening->match>0)
869                 qOpening->match=0;
870         }
871     }
872     return newProp;
873 }
874 
875 /* handle strong characters, digits and candidates for closing brackets */
876 static UBool                            /* return TRUE if success */
bracketProcessChar(BracketData * bd,int32_t position)877 bracketProcessChar(BracketData *bd, int32_t position) {
878     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
879     DirProp *dirProps, dirProp, newProp;
880     UBiDiLevel level;
881     dirProps=bd->pBiDi->dirProps;
882     dirProp=dirProps[position];
883     if(dirProp==ON) {
884         UChar c, match;
885         int32_t idx;
886         /* First see if it is a matching closing bracket. Hopefully, this is
887            more efficient than checking if it is a closing bracket at all */
888         c=bd->pBiDi->text[position];
889         for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
890             if(bd->openings[idx].match!=c)
891                 continue;
892             /* We have a match */
893             newProp=bracketProcessClosing(bd, idx, position);
894             if(newProp==ON) {           /* N0d */
895                 c=0;        /* prevent handling as an opening */
896                 break;
897             }
898             pLastIsoRun->lastBase=ON;
899             pLastIsoRun->contextDir=(UBiDiDirection)newProp;
900             pLastIsoRun->contextPos=position;
901             level=bd->pBiDi->levels[position];
902             if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
903                 uint16_t flag;
904                 int32_t i;
905                 newProp=level&1;
906                 pLastIsoRun->lastStrong=newProp;
907                 flag=DIRPROP_FLAG(newProp);
908                 for(i=pLastIsoRun->start; i<idx; i++)
909                     bd->openings[i].flags|=flag;
910                 /* matching brackets are not overridden by LRO/RLO */
911                 bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
912             }
913             /* matching brackets are not overridden by LRO/RLO */
914             bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
915             return TRUE;
916         }
917         /* We get here only if the ON character is not a matching closing
918            bracket or it is a case of N0d */
919         /* Now see if it is an opening bracket */
920         if(c)
921             match= static_cast<UChar>(u_getBidiPairedBracket(c));    /* get the matching char */
922         else
923             match=0;
924         if(match!=c &&                  /* has a matching char */
925            ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
926             /* special case: process synonyms
927                create an opening entry for each synonym */
928             if(match==0x232A) {     /* RIGHT-POINTING ANGLE BRACKET */
929                 if(!bracketAddOpening(bd, 0x3009, position))
930                     return FALSE;
931             }
932             else if(match==0x3009) {         /* RIGHT ANGLE BRACKET */
933                 if(!bracketAddOpening(bd, 0x232A, position))
934                     return FALSE;
935             }
936             if(!bracketAddOpening(bd, match, position))
937                 return FALSE;
938         }
939     }
940     level=bd->pBiDi->levels[position];
941     if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
942         newProp=level&1;
943         if(dirProp!=S && dirProp!=WS && dirProp!=ON)
944             dirProps[position]=newProp;
945         pLastIsoRun->lastBase=newProp;
946         pLastIsoRun->lastStrong=newProp;
947         pLastIsoRun->contextDir=(UBiDiDirection)newProp;
948         pLastIsoRun->contextPos=position;
949     }
950     else if(dirProp<=R || dirProp==AL) {
951         newProp= static_cast<DirProp>(DIR_FROM_STRONG(dirProp));
952         pLastIsoRun->lastBase=dirProp;
953         pLastIsoRun->lastStrong=dirProp;
954         pLastIsoRun->contextDir=(UBiDiDirection)newProp;
955         pLastIsoRun->contextPos=position;
956     }
957     else if(dirProp==EN) {
958         pLastIsoRun->lastBase=EN;
959         if(pLastIsoRun->lastStrong==L) {
960             newProp=L;                  /* W7 */
961             if(!bd->isNumbersSpecial)
962                 dirProps[position]=ENL;
963             pLastIsoRun->contextDir=(UBiDiDirection)L;
964             pLastIsoRun->contextPos=position;
965         }
966         else {
967             newProp=R;                  /* N0 */
968             if(pLastIsoRun->lastStrong==AL)
969                 dirProps[position]=AN;  /* W2 */
970             else
971                 dirProps[position]=ENR;
972             pLastIsoRun->contextDir=(UBiDiDirection)R;
973             pLastIsoRun->contextPos=position;
974         }
975     }
976     else if(dirProp==AN) {
977         newProp=R;                      /* N0 */
978         pLastIsoRun->lastBase=AN;
979         pLastIsoRun->contextDir=(UBiDiDirection)R;
980         pLastIsoRun->contextPos=position;
981     }
982     else if(dirProp==NSM) {
983         /* if the last real char was ON, change NSM to ON so that it
984            will stay ON even if the last real char is a bracket which
985            may be changed to L or R */
986         newProp=pLastIsoRun->lastBase;
987         if(newProp==ON)
988             dirProps[position]=newProp;
989     }
990     else {
991         newProp=dirProp;
992         pLastIsoRun->lastBase=dirProp;
993     }
994     if(newProp<=R || newProp==AL) {
995         int32_t i;
996         uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
997         for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
998             if(position>bd->openings[i].position)
999                 bd->openings[i].flags|=flag;
1000     }
1001     return TRUE;
1002 }
1003 
1004 /* perform (X1)..(X9) ------------------------------------------------------- */
1005 
1006 /* determine if the text is mixed-directional or single-directional */
1007 static UBiDiDirection
directionFromFlags(UBiDi * pBiDi)1008 directionFromFlags(UBiDi *pBiDi) {
1009     Flags flags=pBiDi->flags;
1010     /* if the text contains AN and neutrals, then some neutrals may become RTL */
1011     if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
1012         return UBIDI_LTR;
1013     } else if(!(flags&MASK_LTR)) {
1014         return UBIDI_RTL;
1015     } else {
1016         return UBIDI_MIXED;
1017     }
1018 }
1019 
1020 /*
1021  * Resolve the explicit levels as specified by explicit embedding codes.
1022  * Recalculate the flags to have them reflect the real properties
1023  * after taking the explicit embeddings into account.
1024  *
1025  * The BiDi algorithm is designed to result in the same behavior whether embedding
1026  * levels are externally specified (from "styled text", supposedly the preferred
1027  * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1028  * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1029  * However, in a real implementation, the removal of these codes and their index
1030  * positions in the plain text is undesirable since it would result in
1031  * reallocated, reindexed text.
1032  * Instead, this implementation leaves the codes in there and just ignores them
1033  * in the subsequent processing.
1034  * In order to get the same reordering behavior, positions with a BN or a not-isolate
1035  * explicit embedding code just get the same level assigned as the last "real"
1036  * character.
1037  *
1038  * Some implementations, not this one, then overwrite some of these
1039  * directionality properties at "real" same-level-run boundaries by
1040  * L or R codes so that the resolution of weak types can be performed on the
1041  * entire paragraph at once instead of having to parse it once more and
1042  * perform that resolution on same-level-runs.
1043  * This limits the scope of the implicit rules in effectively
1044  * the same way as the run limits.
1045  *
1046  * Instead, this implementation does not modify these codes, except for
1047  * paired brackets whose properties (ON) may be replaced by L or R.
1048  * On one hand, the paragraph has to be scanned for same-level-runs, but
1049  * on the other hand, this saves another loop to reset these codes,
1050  * or saves making and modifying a copy of dirProps[].
1051  *
1052  *
1053  * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1054  *
1055  *
1056  * Handling the stack of explicit levels (Xn):
1057  *
1058  * With the BiDi stack of explicit levels, as pushed with each
1059  * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1060  * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
1061  *
1062  * In order to have a correct push-pop semantics even in the case of overflows,
1063  * overflow counters and a valid isolate counter are used as described in UAX#9
1064  * section 3.3.2 "Explicit Levels and Directions".
1065  *
1066  * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
1067  *
1068  * Returns normally the direction; -1 if there was a memory shortage
1069  *
1070  */
1071 static UBiDiDirection
resolveExplicitLevels(UBiDi * pBiDi,UErrorCode * pErrorCode)1072 resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1073     DirProp *dirProps=pBiDi->dirProps;
1074     UBiDiLevel *levels=pBiDi->levels;
1075     const UChar *text=pBiDi->text;
1076 
1077     int32_t i=0, length=pBiDi->length;
1078     Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
1079     DirProp dirProp;
1080     UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
1081     UBiDiDirection direction;
1082     pBiDi->isolateCount=0;
1083 
1084     if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
1085 
1086     /* determine if the text is mixed-directional or single-directional */
1087     direction=directionFromFlags(pBiDi);
1088 
1089     /* we may not need to resolve any explicit levels */
1090     if((direction!=UBIDI_MIXED)) {
1091         /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1092         return direction;
1093     }
1094     if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1095         /* inverse BiDi: mixed, but all characters are at the same embedding level */
1096         /* set all levels to the paragraph level */
1097         int32_t paraIndex, start, limit;
1098         for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1099             if(paraIndex==0)
1100                 start=0;
1101             else
1102                 start=pBiDi->paras[paraIndex-1].limit;
1103             limit=pBiDi->paras[paraIndex].limit;
1104             level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
1105             for(i=start; i<limit; i++)
1106                 levels[i]=level;
1107         }
1108         return direction;   /* no bracket matching for inverse BiDi */
1109     }
1110     if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1111         /* no embeddings, set all levels to the paragraph level */
1112         /* we still have to perform bracket matching */
1113         int32_t paraIndex, start, limit;
1114         BracketData bracketData;
1115         bracketInit(pBiDi, &bracketData);
1116         for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1117             if(paraIndex==0)
1118                 start=0;
1119             else
1120                 start=pBiDi->paras[paraIndex-1].limit;
1121             limit=pBiDi->paras[paraIndex].limit;
1122             level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
1123             for(i=start; i<limit; i++) {
1124                 levels[i]=level;
1125                 dirProp=dirProps[i];
1126                 if(dirProp==BN)
1127                     continue;
1128                 if(dirProp==B) {
1129                     if((i+1)<length) {
1130                         if(text[i]==CR && text[i+1]==LF)
1131                             continue;   /* skip CR when followed by LF */
1132                         bracketProcessB(&bracketData, level);
1133                     }
1134                     continue;
1135                 }
1136                 if(!bracketProcessChar(&bracketData, i)) {
1137                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1138                     return UBIDI_LTR;
1139                 }
1140             }
1141         }
1142         return direction;
1143     }
1144     {
1145         /* continue to perform (Xn) */
1146 
1147         /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1148         /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1149         UBiDiLevel embeddingLevel=level, newLevel;
1150         UBiDiLevel previousLevel=level;     /* previous level for regular (not CC) characters */
1151         int32_t lastCcPos=0;                /* index of last effective LRx,RLx, PDx */
1152 
1153         /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1154            stackLast points to its current entry. */
1155         uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2];   /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1156                                                         but we need one more entry as base */
1157         uint32_t stackLast=0;
1158         int32_t overflowIsolateCount=0;
1159         int32_t overflowEmbeddingCount=0;
1160         int32_t validIsolateCount=0;
1161         BracketData bracketData;
1162         bracketInit(pBiDi, &bracketData);
1163         stack[0]=level;     /* initialize base entry to para level, no override, no isolate */
1164 
1165         /* recalculate the flags */
1166         flags=0;
1167 
1168         for(i=0; i<length; ++i) {
1169             dirProp=dirProps[i];
1170             switch(dirProp) {
1171             case LRE:
1172             case RLE:
1173             case LRO:
1174             case RLO:
1175                 /* (X2, X3, X4, X5) */
1176                 flags|=DIRPROP_FLAG(BN);
1177                 levels[i]=previousLevel;
1178                 if (dirProp==LRE || dirProp==LRO)
1179                     /* least greater even level */
1180                     newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1181                 else
1182                     /* least greater odd level */
1183                     newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1184                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1185                                                          overflowEmbeddingCount==0) {
1186                     lastCcPos=i;
1187                     embeddingLevel=newLevel;
1188                     if(dirProp==LRO || dirProp==RLO)
1189                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
1190                     stackLast++;
1191                     stack[stackLast]=embeddingLevel;
1192                     /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1193                        since this has already been done for newLevel which is
1194                        the source for embeddingLevel.
1195                      */
1196                 } else {
1197                     if(overflowIsolateCount==0)
1198                         overflowEmbeddingCount++;
1199                 }
1200                 break;
1201             case PDF:
1202                 /* (X7) */
1203                 flags|=DIRPROP_FLAG(BN);
1204                 levels[i]=previousLevel;
1205                 /* handle all the overflow cases first */
1206                 if(overflowIsolateCount) {
1207                     break;
1208                 }
1209                 if(overflowEmbeddingCount) {
1210                     overflowEmbeddingCount--;
1211                     break;
1212                 }
1213                 if(stackLast>0 && stack[stackLast]<ISOLATE) {   /* not an isolate entry */
1214                     lastCcPos=i;
1215                     stackLast--;
1216                     embeddingLevel=(UBiDiLevel)stack[stackLast];
1217                 }
1218                 break;
1219             case LRI:
1220             case RLI:
1221                 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1222                 levels[i]=NO_OVERRIDE(embeddingLevel);
1223                 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1224                     bracketProcessBoundary(&bracketData, lastCcPos,
1225                                            previousLevel, embeddingLevel);
1226                     flags|=DIRPROP_FLAG_MULTI_RUNS;
1227                 }
1228                 previousLevel=embeddingLevel;
1229                 /* (X5a, X5b) */
1230                 if(dirProp==LRI)
1231                     /* least greater even level */
1232                     newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1233                 else
1234                     /* least greater odd level */
1235                     newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1236                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1237                                                          overflowEmbeddingCount==0) {
1238                     flags|=DIRPROP_FLAG(dirProp);
1239                     lastCcPos=i;
1240                     validIsolateCount++;
1241                     if(validIsolateCount>pBiDi->isolateCount)
1242                         pBiDi->isolateCount=validIsolateCount;
1243                     embeddingLevel=newLevel;
1244                     /* we can increment stackLast without checking because newLevel
1245                        will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1246                     stackLast++;
1247                     stack[stackLast]=embeddingLevel+ISOLATE;
1248                     bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1249                 } else {
1250                     /* make it WS so that it is handled by adjustWSLevels() */
1251                     dirProps[i]=WS;
1252                     overflowIsolateCount++;
1253                 }
1254                 break;
1255             case PDI:
1256                 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1257                     bracketProcessBoundary(&bracketData, lastCcPos,
1258                                            previousLevel, embeddingLevel);
1259                     flags|=DIRPROP_FLAG_MULTI_RUNS;
1260                 }
1261                 /* (X6a) */
1262                 if(overflowIsolateCount) {
1263                     overflowIsolateCount--;
1264                     /* make it WS so that it is handled by adjustWSLevels() */
1265                     dirProps[i]=WS;
1266                 }
1267                 else if(validIsolateCount) {
1268                     flags|=DIRPROP_FLAG(PDI);
1269                     lastCcPos=i;
1270                     overflowEmbeddingCount=0;
1271                     while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1272                         stackLast--;                /* until the last isolate entry */
1273                     stackLast--;                    /* pop also the last isolate entry */
1274                     validIsolateCount--;
1275                     bracketProcessPDI(&bracketData);
1276                 } else
1277                     /* make it WS so that it is handled by adjustWSLevels() */
1278                     dirProps[i]=WS;
1279                 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
1280                 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1281                 previousLevel=embeddingLevel;
1282                 levels[i]=NO_OVERRIDE(embeddingLevel);
1283                 break;
1284             case B:
1285                 flags|=DIRPROP_FLAG(B);
1286                 levels[i]=GET_PARALEVEL(pBiDi, i);
1287                 if((i+1)<length) {
1288                     if(text[i]==CR && text[i+1]==LF)
1289                         break;          /* skip CR when followed by LF */
1290                     overflowEmbeddingCount=overflowIsolateCount=0;
1291                     validIsolateCount=0;
1292                     stackLast=0;
1293                     previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1294                     stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
1295                     bracketProcessB(&bracketData, embeddingLevel);
1296                 }
1297                 break;
1298             case BN:
1299                 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1300                 /* they will get their levels set correctly in adjustWSLevels() */
1301                 levels[i]=previousLevel;
1302                 flags|=DIRPROP_FLAG(BN);
1303                 break;
1304             default:
1305                 /* all other types are normal characters and get the "real" level */
1306                 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1307                     bracketProcessBoundary(&bracketData, lastCcPos,
1308                                            previousLevel, embeddingLevel);
1309                     flags|=DIRPROP_FLAG_MULTI_RUNS;
1310                     if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
1311                         flags|=DIRPROP_FLAG_O(embeddingLevel);
1312                     else
1313                         flags|=DIRPROP_FLAG_E(embeddingLevel);
1314                 }
1315                 previousLevel=embeddingLevel;
1316                 levels[i]=embeddingLevel;
1317                 if(!bracketProcessChar(&bracketData, i))
1318                     return (UBiDiDirection)-1;
1319                 /* the dirProp may have been changed in bracketProcessChar() */
1320                 flags|=DIRPROP_FLAG(dirProps[i]);
1321                 break;
1322             }
1323         }
1324         if(flags&MASK_EMBEDDING)
1325             flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1326         if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
1327             flags|=DIRPROP_FLAG(L);
1328         /* again, determine if the text is mixed-directional or single-directional */
1329         pBiDi->flags=flags;
1330         direction=directionFromFlags(pBiDi);
1331     }
1332     return direction;
1333 }
1334 
1335 /*
1336  * Use a pre-specified embedding levels array:
1337  *
1338  * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1339  * ignore all explicit codes (X9),
1340  * and check all the preset levels.
1341  *
1342  * Recalculate the flags to have them reflect the real properties
1343  * after taking the explicit embeddings into account.
1344  */
1345 static UBiDiDirection
checkExplicitLevels(UBiDi * pBiDi,UErrorCode * pErrorCode)1346 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1347     DirProp *dirProps=pBiDi->dirProps;
1348     UBiDiLevel *levels=pBiDi->levels;
1349     int32_t isolateCount=0;
1350 
1351     int32_t length=pBiDi->length;
1352     Flags flags=0;  /* collect all directionalities in the text */
1353     pBiDi->isolateCount=0;
1354 
1355     int32_t currentParaIndex = 0;
1356     int32_t currentParaLimit = pBiDi->paras[0].limit;
1357     int32_t currentParaLevel = pBiDi->paraLevel;
1358 
1359     for(int32_t i=0; i<length; ++i) {
1360         UBiDiLevel level=levels[i];
1361         DirProp dirProp=dirProps[i];
1362         if(dirProp==LRI || dirProp==RLI) {
1363             isolateCount++;
1364             if(isolateCount>pBiDi->isolateCount)
1365                 pBiDi->isolateCount=isolateCount;
1366         }
1367         else if(dirProp==PDI)
1368             isolateCount--;
1369         else if(dirProp==B)
1370             isolateCount=0;
1371 
1372         // optimized version of  int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
1373         if (pBiDi->defaultParaLevel != 0 &&
1374                 i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
1375             currentParaLevel = pBiDi->paras[++currentParaIndex].level;
1376             currentParaLimit = pBiDi->paras[currentParaIndex].limit;
1377         }
1378 
1379         UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
1380         level &= ~UBIDI_LEVEL_OVERRIDE;
1381         if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
1382             if (level == 0) {
1383                 if (dirProp == B) {
1384                     // Paragraph separators are ok with explicit level 0.
1385                     // Prevents reordering of paragraphs.
1386                 } else {
1387                     // Treat explicit level 0 as a wildcard for the paragraph level.
1388                     // Avoid making the caller guess what the paragraph level would be.
1389                     level = (UBiDiLevel)currentParaLevel;
1390                     levels[i] = level | overrideFlag;
1391                 }
1392             } else {
1393                 // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
1394                 /* level out of bounds */
1395                 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1396                 return UBIDI_LTR;
1397             }
1398         }
1399         if (overrideFlag != 0) {
1400             /* keep the override flag in levels[i] but adjust the flags */
1401             flags|=DIRPROP_FLAG_O(level);
1402         } else {
1403             /* set the flags */
1404             flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
1405         }
1406     }
1407     if(flags&MASK_EMBEDDING)
1408         flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1409     /* determine if the text is mixed-directional or single-directional */
1410     pBiDi->flags=flags;
1411     return directionFromFlags(pBiDi);
1412 }
1413 
1414 /******************************************************************
1415  The Properties state machine table
1416 *******************************************************************
1417 
1418  All table cells are 8 bits:
1419       bits 0..4:  next state
1420       bits 5..7:  action to perform (if > 0)
1421 
1422  Cells may be of format "n" where n represents the next state
1423  (except for the rightmost column).
1424  Cells may also be of format "s(x,y)" where x represents an action
1425  to perform and y represents the next state.
1426 
1427 *******************************************************************
1428  Definitions and type for properties state table
1429 *******************************************************************
1430 */
1431 #define IMPTABPROPS_COLUMNS 16
1432 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1433 #define GET_STATEPROPS(cell) ((cell)&0x1f)
1434 #define GET_ACTIONPROPS(cell) ((cell)>>5)
1435 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
1436 
1437 static const uint8_t groupProp[] =          /* dirProp regrouped */
1438 {
1439 /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
1440     0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
1441 };
1442 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1443 
1444 /******************************************************************
1445 
1446       PROPERTIES  STATE  TABLE
1447 
1448  In table impTabProps,
1449       - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1450       - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1451       - the Res column is the reduced property assigned to a run
1452 
1453  Action 1: process current run1, init new run1
1454         2: init new run2
1455         3: process run1, process run2, init new run1
1456         4: process run1, set run1=run2, init new run2
1457 
1458  Notes:
1459   1) This table is used in resolveImplicitLevels().
1460   2) This table triggers actions when there is a change in the Bidi
1461      property of incoming characters (action 1).
1462   3) Most such property sequences are processed immediately (in
1463      fact, passed to processPropertySeq().
1464   4) However, numbers are assembled as one sequence. This means
1465      that undefined situations (like CS following digits, until
1466      it is known if the next char will be a digit) are held until
1467      following chars define them.
1468      Example: digits followed by CS, then comes another CS or ON;
1469               the digits will be processed, then the CS assigned
1470               as the start of an ON sequence (action 3).
1471   5) There are cases where more than one sequence must be
1472      processed, for instance digits followed by CS followed by L:
1473      the digits must be processed as one sequence, and the CS
1474      must be processed as an ON sequence, all this before starting
1475      assembling chars for the opening L sequence.
1476 
1477 
1478 */
1479 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1480 {
1481 /*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,   ENL ,   ENR , Res */
1482 /* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,    18 ,    21 , DirProp_ON },
1483 /* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),s(1,18),s(1,21),  DirProp_L },
1484 /* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),s(1,18),s(1,21),  DirProp_R },
1485 /* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,s(1,18),s(1,21),  DirProp_R },
1486 /* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),    18 ,    21 , DirProp_EN },
1487 /* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1488 /* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),    18 ,    21 , DirProp_AN },
1489 /* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1490 /* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1491 /* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),    18 ,    21 , DirProp_ON },
1492 /*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),    18 ,    21 , DirProp_EN },
1493 /*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),    18 ,    21 , DirProp_EN },
1494 /*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1495 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),    18 ,    21 , DirProp_AN },
1496 /*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1497 /*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_S },
1498 /*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),s(1,18),s(1,21),  DirProp_S },
1499 /*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_B },
1500 /*18 ENL         */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19),    20 ,s(2,19),    18 ,    18 , s(1,3),    18 ,    21 ,  DirProp_L },
1501 /*19 ENL+ES/CS   */ { s(3,1), s(3,2),    18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    19 , s(4,7), s(3,3),    18 ,    21 ,  DirProp_L },
1502 /*20 ENL+ET      */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    20 , s(1,7),    20 ,    20 , s(1,3),    18 ,    21 ,  DirProp_L },
1503 /*21 ENR         */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22),    23 ,s(2,22),    21 ,    21 , s(1,3),    18 ,    21 , DirProp_AN },
1504 /*22 ENR+ES/CS   */ { s(3,1), s(3,2),    21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    22 , s(4,7), s(3,3),    18 ,    21 , DirProp_AN },
1505 /*23 ENR+ET      */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    23 , s(1,7),    23 ,    23 , s(1,3),    18 ,    21 , DirProp_AN }
1506 };
1507 
1508 /*  we must undef macro s because the levels tables have a different
1509  *  structure (4 bits for action and 4 bits for next state.
1510  */
1511 #undef s
1512 
1513 /******************************************************************
1514  The levels state machine tables
1515 *******************************************************************
1516 
1517  All table cells are 8 bits:
1518       bits 0..3:  next state
1519       bits 4..7:  action to perform (if > 0)
1520 
1521  Cells may be of format "n" where n represents the next state
1522  (except for the rightmost column).
1523  Cells may also be of format "s(x,y)" where x represents an action
1524  to perform and y represents the next state.
1525 
1526  This format limits each table to 16 states each and to 15 actions.
1527 
1528 *******************************************************************
1529  Definitions and type for levels state tables
1530 *******************************************************************
1531 */
1532 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1533 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1534 #define GET_STATE(cell) ((cell)&0x0f)
1535 #define GET_ACTION(cell) ((cell)>>4)
1536 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
1537 
1538 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1539 typedef uint8_t ImpAct[];
1540 
1541 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1542  * instead of having a pair of ImpTab and a pair of ImpAct.
1543  */
1544 typedef struct ImpTabPair {
1545     const void * pImpTab[2];
1546     const void * pImpAct[2];
1547 } ImpTabPair;
1548 
1549 /******************************************************************
1550 
1551       LEVELS  STATE  TABLES
1552 
1553  In all levels state tables,
1554       - state 0 is the initial state
1555       - the Res column is the increment to add to the text level
1556         for this property sequence.
1557 
1558  The impAct arrays for each table of a pair map the local action
1559  numbers of the table to the total list of actions. For instance,
1560  action 2 in a given table corresponds to the action number which
1561  appears in entry [2] of the impAct array for that table.
1562  The first entry of all impAct arrays must be 0.
1563 
1564  Action 1: init conditional sequence
1565         2: prepend conditional sequence to current sequence
1566         3: set ON sequence to new level - 1
1567         4: init EN/AN/ON sequence
1568         5: fix EN/AN/ON sequence followed by R
1569         6: set previous level sequence to level 2
1570 
1571  Notes:
1572   1) These tables are used in processPropertySeq(). The input
1573      is property sequences as determined by resolveImplicitLevels.
1574   2) Most such property sequences are processed immediately
1575      (levels are assigned).
1576   3) However, some sequences cannot be assigned a final level till
1577      one or more following sequences are received. For instance,
1578      ON following an R sequence within an even-level paragraph.
1579      If the following sequence is R, the ON sequence will be
1580      assigned basic run level+1, and so will the R sequence.
1581   4) S is generally handled like ON, since its level will be fixed
1582      to paragraph level in adjustWSLevels().
1583 
1584 */
1585 
1586 static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
1587 /*  In this table, conditional sequences receive the lower possible level
1588     until proven otherwise.
1589 */
1590 {
1591 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1592 /* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
1593 /* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1594 /* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
1595 /* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
1596 /* 4 : R+ON       */ {     0 , s(2,1), s(3,3), s(3,3),     4 ,     4 ,     0 ,  0 },
1597 /* 5 : AN+ON      */ {     0 , s(2,1),     0 , s(3,2),     5 ,     5 ,     0 ,  0 }
1598 };
1599 static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
1600 /*  In this table, conditional sequences receive the lower possible level
1601     until proven otherwise.
1602 */
1603 {
1604 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1605 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1606 /* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1607 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1608 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
1609 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
1610 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
1611 };
1612 static const ImpAct impAct0 = {0,1,2,3,4};
1613 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1614                                            &impTabR_DEFAULT},
1615                                           {&impAct0, &impAct0}};
1616 
1617 static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
1618 /*  In this table, conditional sequences receive the lower possible level
1619     until proven otherwise.
1620 */
1621 {
1622 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1623 /* 0 : init       */ {     0 ,     2 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1624 /* 1 : L+EN/AN    */ {     0 , s(4,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1625 /* 2 : R          */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  1 },
1626 /* 3 : R+ON       */ {     0 , s(2,2), s(3,4), s(3,4),     3 ,     3 ,     0 ,  0 },
1627 /* 4 : R+EN/AN    */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  2 }
1628 };
1629 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1630                                                    &impTabR_DEFAULT},
1631                                                   {&impAct0, &impAct0}};
1632 
1633 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1634 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
1635     until proven that there is L or sor/eor on both sides. AN is handled like EN.
1636 */
1637 {
1638 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1639 /* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1640 /* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
1641 /* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
1642 /* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
1643 /* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
1644 /* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
1645 };
1646 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1647 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
1648     until proven that there is L on both sides. AN is handled like EN.
1649 */
1650 {
1651 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1652 /* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1653 /* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1654 /* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
1655 /* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
1656 /* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
1657 };
1658 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1659                         {&impTabL_GROUP_NUMBERS_WITH_R,
1660                          &impTabR_GROUP_NUMBERS_WITH_R},
1661                         {&impAct0, &impAct0}};
1662 
1663 
1664 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1665 /*  This table is identical to the Default LTR table except that EN and AN are
1666     handled like L.
1667 */
1668 {
1669 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1670 /* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
1671 /* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
1672 /* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
1673 /* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
1674 /* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
1675 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
1676 };
1677 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1678 /*  This table is identical to the Default RTL table except that EN and AN are
1679     handled like L.
1680 */
1681 {
1682 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1683 /* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1684 /* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
1685 /* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1686 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
1687 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
1688 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
1689 };
1690 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1691                         {&impTabL_INVERSE_NUMBERS_AS_L,
1692                          &impTabR_INVERSE_NUMBERS_AS_L},
1693                         {&impAct0, &impAct0}};
1694 
1695 static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
1696 /*  In this table, conditional sequences receive the lower possible level
1697     until proven otherwise.
1698 */
1699 {
1700 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1701 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1702 /* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
1703 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1704 /* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
1705 /* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
1706 /* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
1707 /* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
1708 };
1709 static const ImpAct impAct1 = {0,1,13,14};
1710 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1711  */
1712 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1713                         {&impTabL_DEFAULT,
1714                          &impTabR_INVERSE_LIKE_DIRECT},
1715                         {&impAct0, &impAct1}};
1716 
1717 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1718 /*  The case handled in this table is (visually):  R EN L
1719 */
1720 {
1721 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1722 /* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
1723 /* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
1724 /* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
1725 /* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
1726 /* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
1727 /* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
1728 /* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
1729 };
1730 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1731 /*  The cases handled in this table are (visually):  R EN L
1732                                                      R L AN L
1733 */
1734 {
1735 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1736 /* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1737 /* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
1738 /* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
1739 /* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
1740 /* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
1741 /* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
1742 /* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
1743 };
1744 static const ImpAct impAct2 = {0,1,2,5,6,7,8};
1745 static const ImpAct impAct3 = {0,1,9,10,11,12};
1746 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1747                         {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1748                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1749                         {&impAct2, &impAct3}};
1750 
1751 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1752                         {&impTabL_NUMBERS_SPECIAL,
1753                          &impTabR_INVERSE_LIKE_DIRECT},
1754                         {&impAct0, &impAct1}};
1755 
1756 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1757 /*  The case handled in this table is (visually):  R EN L
1758 */
1759 {
1760 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1761 /* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1762 /* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
1763 /* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
1764 /* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
1765 /* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
1766 };
1767 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1768                         {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1769                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1770                         {&impAct2, &impAct3}};
1771 
1772 #undef s
1773 
1774 typedef struct {
1775     const ImpTab * pImpTab;             /* level table pointer          */
1776     const ImpAct * pImpAct;             /* action map array             */
1777     int32_t startON;                    /* start of ON sequence         */
1778     int32_t startL2EN;                  /* start of level 2 sequence    */
1779     int32_t lastStrongRTL;              /* index of last found R or AL  */
1780     int32_t state;                      /* current state                */
1781     int32_t runStart;                   /* start position of the run    */
1782     UBiDiLevel runLevel;                /* run level before implicit solving */
1783 } LevState;
1784 
1785 /*------------------------------------------------------------------------*/
1786 
1787 static void
addPoint(UBiDi * pBiDi,int32_t pos,int32_t flag)1788 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1789   /* param pos:     position where to insert
1790      param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1791   */
1792 {
1793 #define FIRSTALLOC  10
1794     Point point;
1795     InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1796 
1797     if (pInsertPoints->capacity == 0)
1798     {
1799         pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC));
1800         if (pInsertPoints->points == NULL)
1801         {
1802             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1803             return;
1804         }
1805         pInsertPoints->capacity=FIRSTALLOC;
1806     }
1807     if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1808     {
1809         Point * savePoints=pInsertPoints->points;
1810         pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points,
1811                                            pInsertPoints->capacity*2*sizeof(Point)));
1812         if (pInsertPoints->points == NULL)
1813         {
1814             pInsertPoints->points=savePoints;
1815             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1816             return;
1817         }
1818         else  pInsertPoints->capacity*=2;
1819     }
1820     point.pos=pos;
1821     point.flag=flag;
1822     pInsertPoints->points[pInsertPoints->size]=point;
1823     pInsertPoints->size++;
1824 #undef FIRSTALLOC
1825 }
1826 
1827 static void
setLevelsOutsideIsolates(UBiDi * pBiDi,int32_t start,int32_t limit,UBiDiLevel level)1828 setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
1829 {
1830     DirProp *dirProps=pBiDi->dirProps, dirProp;
1831     UBiDiLevel *levels=pBiDi->levels;
1832     int32_t isolateCount=0, k;
1833     for(k=start; k<limit; k++) {
1834         dirProp=dirProps[k];
1835         if(dirProp==PDI)
1836             isolateCount--;
1837         if(isolateCount==0)
1838             levels[k]=level;
1839         if(dirProp==LRI || dirProp==RLI)
1840             isolateCount++;
1841     }
1842 }
1843 
1844 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1845 
1846 /*
1847  * This implementation of the (Wn) rules applies all rules in one pass.
1848  * In order to do so, it needs a look-ahead of typically 1 character
1849  * (except for W5: sequences of ET) and keeps track of changes
1850  * in a rule Wp that affect a later Wq (p<q).
1851  *
1852  * The (Nn) and (In) rules are also performed in that same single loop,
1853  * but effectively one iteration behind for white space.
1854  *
1855  * Since all implicit rules are performed in one step, it is not necessary
1856  * to actually store the intermediate directional properties in dirProps[].
1857  */
1858 
1859 static void
processPropertySeq(UBiDi * pBiDi,LevState * pLevState,uint8_t _prop,int32_t start,int32_t limit)1860 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1861                    int32_t start, int32_t limit) {
1862     uint8_t cell, oldStateSeq, actionSeq;
1863     const ImpTab * pImpTab=pLevState->pImpTab;
1864     const ImpAct * pImpAct=pLevState->pImpAct;
1865     UBiDiLevel * levels=pBiDi->levels;
1866     UBiDiLevel level, addLevel;
1867     InsertPoints * pInsertPoints;
1868     int32_t start0, k;
1869 
1870     start0=start;                           /* save original start position */
1871     oldStateSeq=(uint8_t)pLevState->state;
1872     cell=(*pImpTab)[oldStateSeq][_prop];
1873     pLevState->state=GET_STATE(cell);       /* isolate the new state */
1874     actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1875     addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1876 
1877     if(actionSeq) {
1878         switch(actionSeq) {
1879         case 1:                         /* init ON seq */
1880             pLevState->startON=start0;
1881             break;
1882 
1883         case 2:                         /* prepend ON seq to current seq */
1884             start=pLevState->startON;
1885             break;
1886 
1887         case 3:                         /* EN/AN after R+ON */
1888             level=pLevState->runLevel+1;
1889             setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1890             break;
1891 
1892         case 4:                         /* EN/AN before R for NUMBERS_SPECIAL */
1893             level=pLevState->runLevel+2;
1894             setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1895             break;
1896 
1897         case 5:                         /* L or S after possible relevant EN/AN */
1898             /* check if we had EN after R/AL */
1899             if (pLevState->startL2EN >= 0) {
1900                 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1901             }
1902             pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
1903             /* check if we had any relevant EN/AN after R/AL */
1904             pInsertPoints=&(pBiDi->insertPoints);
1905             if ((pInsertPoints->capacity == 0) ||
1906                 (pInsertPoints->size <= pInsertPoints->confirmed))
1907             {
1908                 /* nothing, just clean up */
1909                 pLevState->lastStrongRTL=-1;
1910                 /* check if we have a pending conditional segment */
1911                 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1912                 if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
1913                     start=pLevState->startON;   /* reset to basic run level */
1914                 }
1915                 if (_prop == DirProp_S)                /* add LRM before S */
1916                 {
1917                     addPoint(pBiDi, start0, LRM_BEFORE);
1918                     pInsertPoints->confirmed=pInsertPoints->size;
1919                 }
1920                 break;
1921             }
1922             /* reset previous RTL cont to level for LTR text */
1923             for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1924             {
1925                 /* reset odd level, leave runLevel+2 as is */
1926                 levels[k]=(levels[k] - 2) & ~1;
1927             }
1928             /* mark insert points as confirmed */
1929             pInsertPoints->confirmed=pInsertPoints->size;
1930             pLevState->lastStrongRTL=-1;
1931             if (_prop == DirProp_S)            /* add LRM before S */
1932             {
1933                 addPoint(pBiDi, start0, LRM_BEFORE);
1934                 pInsertPoints->confirmed=pInsertPoints->size;
1935             }
1936             break;
1937 
1938         case 6:                         /* R/AL after possible relevant EN/AN */
1939             /* just clean up */
1940             pInsertPoints=&(pBiDi->insertPoints);
1941             if (pInsertPoints->capacity > 0)
1942                 /* remove all non confirmed insert points */
1943                 pInsertPoints->size=pInsertPoints->confirmed;
1944             pLevState->startON=-1;
1945             pLevState->startL2EN=-1;
1946             pLevState->lastStrongRTL=limit - 1;
1947             break;
1948 
1949         case 7:                         /* EN/AN after R/AL + possible cont */
1950             /* check for real AN */
1951             if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
1952                 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1953             {
1954                 /* real AN */
1955                 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1956                 {
1957                     /* just note the righmost digit as a strong RTL */
1958                     pLevState->lastStrongRTL=limit - 1;
1959                     break;
1960                 }
1961                 if (pLevState->startL2EN >= 0)  /* after EN, no AN */
1962                 {
1963                     addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1964                     pLevState->startL2EN=-2;
1965                 }
1966                 /* note AN */
1967                 addPoint(pBiDi, start0, LRM_BEFORE);
1968                 break;
1969             }
1970             /* if first EN/AN after R/AL */
1971             if (pLevState->startL2EN == -1) {
1972                 pLevState->startL2EN=start0;
1973             }
1974             break;
1975 
1976         case 8:                         /* note location of latest R/AL */
1977             pLevState->lastStrongRTL=limit - 1;
1978             pLevState->startON=-1;
1979             break;
1980 
1981         case 9:                         /* L after R+ON/EN/AN */
1982             /* include possible adjacent number on the left */
1983             for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1984             if(k>=0) {
1985                 addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
1986                 pInsertPoints=&(pBiDi->insertPoints);
1987                 pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
1988             }
1989             pLevState->startON=start0;
1990             break;
1991 
1992         case 10:                        /* AN after L */
1993             /* AN numbers between L text on both sides may be trouble. */
1994             /* tentatively bracket with LRMs; will be confirmed if followed by L */
1995             addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
1996             addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
1997             break;
1998 
1999         case 11:                        /* R after L+ON/EN/AN */
2000             /* false alert, infirm LRMs around previous AN */
2001             pInsertPoints=&(pBiDi->insertPoints);
2002             pInsertPoints->size=pInsertPoints->confirmed;
2003             if (_prop == DirProp_S)            /* add RLM before S */
2004             {
2005                 addPoint(pBiDi, start0, RLM_BEFORE);
2006                 pInsertPoints->confirmed=pInsertPoints->size;
2007             }
2008             break;
2009 
2010         case 12:                        /* L after L+ON/AN */
2011             level=pLevState->runLevel + addLevel;
2012             for(k=pLevState->startON; k<start0; k++) {
2013                 if (levels[k]<level)
2014                     levels[k]=level;
2015             }
2016             pInsertPoints=&(pBiDi->insertPoints);
2017             pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
2018             pLevState->startON=start0;
2019             break;
2020 
2021         case 13:                        /* L after L+ON+EN/AN/ON */
2022             level=pLevState->runLevel;
2023             for(k=start0-1; k>=pLevState->startON; k--) {
2024                 if(levels[k]==level+3) {
2025                     while(levels[k]==level+3) {
2026                         levels[k--]-=2;
2027                     }
2028                     while(levels[k]==level) {
2029                         k--;
2030                     }
2031                 }
2032                 if(levels[k]==level+2) {
2033                     levels[k]=level;
2034                     continue;
2035                 }
2036                 levels[k]=level+1;
2037             }
2038             break;
2039 
2040         case 14:                        /* R after L+ON+EN/AN/ON */
2041             level=pLevState->runLevel+1;
2042             for(k=start0-1; k>=pLevState->startON; k--) {
2043                 if(levels[k]>level) {
2044                     levels[k]-=2;
2045                 }
2046             }
2047             break;
2048 
2049         default:                        /* we should never get here */
2050             U_ASSERT(FALSE);
2051             break;
2052         }
2053     }
2054     if((addLevel) || (start < start0)) {
2055         level=pLevState->runLevel + addLevel;
2056         if(start>=pLevState->runStart) {
2057             for(k=start; k<limit; k++) {
2058                 levels[k]=level;
2059             }
2060         } else {
2061             setLevelsOutsideIsolates(pBiDi, start, limit, level);
2062         }
2063     }
2064 }
2065 
2066 /**
2067  * Returns the directionality of the last strong character at the end of the prologue, if any.
2068  * Requires prologue!=null.
2069  */
2070 static DirProp
lastL_R_AL(UBiDi * pBiDi)2071 lastL_R_AL(UBiDi *pBiDi) {
2072     const UChar *text=pBiDi->prologue;
2073     int32_t length=pBiDi->proLength;
2074     int32_t i;
2075     UChar32 uchar;
2076     DirProp dirProp;
2077     for(i=length; i>0; ) {
2078         /* i is decremented by U16_PREV */
2079         U16_PREV(text, 0, i, uchar);
2080         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2081         if(dirProp==L) {
2082             return DirProp_L;
2083         }
2084         if(dirProp==R || dirProp==AL) {
2085             return DirProp_R;
2086         }
2087         if(dirProp==B) {
2088             return DirProp_ON;
2089         }
2090     }
2091     return DirProp_ON;
2092 }
2093 
2094 /**
2095  * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2096  * Requires epilogue!=null.
2097  */
2098 static DirProp
firstL_R_AL_EN_AN(UBiDi * pBiDi)2099 firstL_R_AL_EN_AN(UBiDi *pBiDi) {
2100     const UChar *text=pBiDi->epilogue;
2101     int32_t length=pBiDi->epiLength;
2102     int32_t i;
2103     UChar32 uchar;
2104     DirProp dirProp;
2105     for(i=0; i<length; ) {
2106         /* i is incremented by U16_NEXT */
2107         U16_NEXT(text, i, length, uchar);
2108         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2109         if(dirProp==L) {
2110             return DirProp_L;
2111         }
2112         if(dirProp==R || dirProp==AL) {
2113             return DirProp_R;
2114         }
2115         if(dirProp==EN) {
2116             return DirProp_EN;
2117         }
2118         if(dirProp==AN) {
2119             return DirProp_AN;
2120         }
2121     }
2122     return DirProp_ON;
2123 }
2124 
2125 static void
resolveImplicitLevels(UBiDi * pBiDi,int32_t start,int32_t limit,DirProp sor,DirProp eor)2126 resolveImplicitLevels(UBiDi *pBiDi,
2127                       int32_t start, int32_t limit,
2128                       DirProp sor, DirProp eor) {
2129     const DirProp *dirProps=pBiDi->dirProps;
2130     DirProp dirProp;
2131     LevState levState;
2132     int32_t i, start1, start2;
2133     uint16_t oldStateImp, stateImp, actionImp;
2134     uint8_t gprop, resProp, cell;
2135     UBool inverseRTL;
2136     DirProp nextStrongProp=R;
2137     int32_t nextStrongPos=-1;
2138 
2139     /* check for RTL inverse BiDi mode */
2140     /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2141      * loop on the text characters from end to start.
2142      * This would need a different properties state table (at least different
2143      * actions) and different levels state tables (maybe very similar to the
2144      * LTR corresponding ones.
2145      */
2146     inverseRTL=(UBool)
2147         ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2148          (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT  ||
2149           pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
2150 
2151     /* initialize for property and levels state tables */
2152     levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2153     levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2154     levState.runStart=start;
2155     levState.runLevel=pBiDi->levels[start];
2156     levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2157     levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
2158     if(start==0 && pBiDi->proLength>0) {
2159         DirProp lastStrong=lastL_R_AL(pBiDi);
2160         if(lastStrong!=DirProp_ON) {
2161             sor=lastStrong;
2162         }
2163     }
2164     /* The isolates[] entries contain enough information to
2165        resume the bidi algorithm in the same state as it was
2166        when it was interrupted by an isolate sequence. */
2167     if(dirProps[start]==PDI  && pBiDi->isolateCount >= 0) {
2168         levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
2169         start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2170         stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2171         levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2172         pBiDi->isolateCount--;
2173     } else {
2174         levState.startON=-1;
2175         start1=start;
2176         if(dirProps[start]==NSM)
2177             stateImp = 1 + sor;
2178         else
2179             stateImp=0;
2180         levState.state=0;
2181         processPropertySeq(pBiDi, &levState, sor, start, start);
2182     }
2183     start2=start;                       /* to make Java compiler happy */
2184 
2185     for(i=start; i<=limit; i++) {
2186         if(i>=limit) {
2187             int32_t k;
2188             for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--);
2189             dirProp=dirProps[k];
2190             if(dirProp==LRI || dirProp==RLI)
2191                 break;      /* no forced closing for sequence ending with LRI/RLI */
2192             gprop=eor;
2193         } else {
2194             DirProp prop, prop1;
2195             prop=dirProps[i];
2196             if(prop==B) {
2197                 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2198             }
2199             if(inverseRTL) {
2200                 if(prop==AL) {
2201                     /* AL before EN does not make it AN */
2202                     prop=R;
2203                 } else if(prop==EN) {
2204                     if(nextStrongPos<=i) {
2205                         /* look for next strong char (L/R/AL) */
2206                         int32_t j;
2207                         nextStrongProp=R;   /* set default */
2208                         nextStrongPos=limit;
2209                         for(j=i+1; j<limit; j++) {
2210                             prop1=dirProps[j];
2211                             if(prop1==L || prop1==R || prop1==AL) {
2212                                 nextStrongProp=prop1;
2213                                 nextStrongPos=j;
2214                                 break;
2215                             }
2216                         }
2217                     }
2218                     if(nextStrongProp==AL) {
2219                         prop=AN;
2220                     }
2221                 }
2222             }
2223             gprop=groupProp[prop];
2224         }
2225         oldStateImp=stateImp;
2226         cell=impTabProps[oldStateImp][gprop];
2227         stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
2228         actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
2229         if((i==limit) && (actionImp==0)) {
2230             /* there is an unprocessed sequence if its property == eor   */
2231             actionImp=1;                    /* process the last sequence */
2232         }
2233         if(actionImp) {
2234             resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2235             switch(actionImp) {
2236             case 1:             /* process current seq1, init new seq1 */
2237                 processPropertySeq(pBiDi, &levState, resProp, start1, i);
2238                 start1=i;
2239                 break;
2240             case 2:             /* init new seq2 */
2241                 start2=i;
2242                 break;
2243             case 3:             /* process seq1, process seq2, init new seq1 */
2244                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2245                 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
2246                 start1=i;
2247                 break;
2248             case 4:             /* process seq1, set seq1=seq2, init new seq2 */
2249                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2250                 start1=start2;
2251                 start2=i;
2252                 break;
2253             default:            /* we should never get here */
2254                 U_ASSERT(FALSE);
2255                 break;
2256             }
2257         }
2258     }
2259 
2260     /* flush possible pending sequence, e.g. ON */
2261     if(limit==pBiDi->length && pBiDi->epiLength>0) {
2262         DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2263         if(firstStrong!=DirProp_ON) {
2264             eor=firstStrong;
2265         }
2266     }
2267 
2268     /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2269     for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--);
2270     dirProp=dirProps[i];
2271     if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2272         pBiDi->isolateCount++;
2273         pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2274         pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2275         pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2276         pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
2277     }
2278     else
2279         processPropertySeq(pBiDi, &levState, eor, limit, limit);
2280 }
2281 
2282 /* perform (L1) and (X9) ---------------------------------------------------- */
2283 
2284 /*
2285  * Reset the embedding levels for some non-graphic characters (L1).
2286  * This function also sets appropriate levels for BN, and
2287  * explicit embedding types that are supposed to have been removed
2288  * from the paragraph in (X9).
2289  */
2290 static void
adjustWSLevels(UBiDi * pBiDi)2291 adjustWSLevels(UBiDi *pBiDi) {
2292     const DirProp *dirProps=pBiDi->dirProps;
2293     UBiDiLevel *levels=pBiDi->levels;
2294     int32_t i;
2295 
2296     if(pBiDi->flags&MASK_WS) {
2297         UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
2298         Flags flag;
2299 
2300         i=pBiDi->trailingWSStart;
2301         while(i>0) {
2302             /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2303             while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
2304                 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2305                     levels[i]=0;
2306                 } else {
2307                     levels[i]=GET_PARALEVEL(pBiDi, i);
2308                 }
2309             }
2310 
2311             /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2312             /* here, i+1 is guaranteed to be <length */
2313             while(i>0) {
2314                 flag=DIRPROP_FLAG(dirProps[--i]);
2315                 if(flag&MASK_BN_EXPLICIT) {
2316                     levels[i]=levels[i+1];
2317                 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2318                     levels[i]=0;
2319                     break;
2320                 } else if(flag&MASK_B_S) {
2321                     levels[i]=GET_PARALEVEL(pBiDi, i);
2322                     break;
2323                 }
2324             }
2325         }
2326     }
2327 }
2328 
2329 U_CAPI void U_EXPORT2
ubidi_setContext(UBiDi * pBiDi,const UChar * prologue,int32_t proLength,const UChar * epilogue,int32_t epiLength,UErrorCode * pErrorCode)2330 ubidi_setContext(UBiDi *pBiDi,
2331                  const UChar *prologue, int32_t proLength,
2332                  const UChar *epilogue, int32_t epiLength,
2333                  UErrorCode *pErrorCode) {
2334     /* check the argument values */
2335     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2336     if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2337        (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2338         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2339         return;
2340     }
2341 
2342     if(proLength==-1) {
2343         pBiDi->proLength=u_strlen(prologue);
2344     } else {
2345         pBiDi->proLength=proLength;
2346     }
2347     if(epiLength==-1) {
2348         pBiDi->epiLength=u_strlen(epilogue);
2349     } else {
2350         pBiDi->epiLength=epiLength;
2351     }
2352     pBiDi->prologue=prologue;
2353     pBiDi->epilogue=epilogue;
2354 }
2355 
2356 static void
setParaSuccess(UBiDi * pBiDi)2357 setParaSuccess(UBiDi *pBiDi) {
2358     pBiDi->proLength=0;                 /* forget the last context */
2359     pBiDi->epiLength=0;
2360     pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
2361 }
2362 
2363 #define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
2364 #define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
2365 
2366 static void
setParaRunsOnly(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UErrorCode * pErrorCode)2367 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2368                 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
2369     int32_t *runsOnlyMemory = NULL;
2370     int32_t *visualMap;
2371     UChar *visualText;
2372     int32_t saveLength, saveTrailingWSStart;
2373     const UBiDiLevel *levels;
2374     UBiDiLevel *saveLevels;
2375     UBiDiDirection saveDirection;
2376     UBool saveMayAllocateText;
2377     Run *runs;
2378     int32_t visualLength, i, j, visualStart, logicalStart,
2379             runCount, runLength, addedRuns, insertRemove,
2380             start, limit, step, indexOddBit, logicalPos,
2381             index0, index1;
2382     uint32_t saveOptions;
2383 
2384     pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2385     if(length==0) {
2386         ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2387         goto cleanup3;
2388     }
2389     /* obtain memory for mapping table and visual text */
2390     runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))));
2391     if(runsOnlyMemory==NULL) {
2392         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2393         goto cleanup3;
2394     }
2395     visualMap=runsOnlyMemory;
2396     visualText=(UChar *)&visualMap[length];
2397     saveLevels=(UBiDiLevel *)&visualText[length];
2398     saveOptions=pBiDi->reorderingOptions;
2399     if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2400         pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2401         pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2402     }
2403     paraLevel&=1;                       /* accept only 0 or 1 */
2404     ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2405     if(U_FAILURE(*pErrorCode)) {
2406         goto cleanup3;
2407     }
2408     /* we cannot access directly pBiDi->levels since it is not yet set if
2409      * direction is not MIXED
2410      */
2411     levels=ubidi_getLevels(pBiDi, pErrorCode);
2412     uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
2413     saveTrailingWSStart=pBiDi->trailingWSStart;
2414     saveLength=pBiDi->length;
2415     saveDirection=pBiDi->direction;
2416 
2417     /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2418      * the visual map and the dirProps array to drive the second call
2419      * to ubidi_setPara (but must make provision for possible removal of
2420      * BiDi controls.  Alternatively, only use the dirProps array via
2421      * customized classifier callback.
2422      */
2423     visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2424                                       UBIDI_DO_MIRRORING, pErrorCode);
2425     ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2426     if(U_FAILURE(*pErrorCode)) {
2427         goto cleanup2;
2428     }
2429     pBiDi->reorderingOptions=saveOptions;
2430 
2431     pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
2432     paraLevel^=1;
2433     /* Because what we did with reorderingOptions, visualText may be shorter
2434      * than the original text. But we don't want the levels memory to be
2435      * reallocated shorter than the original length, since we need to restore
2436      * the levels as after the first call to ubidi_setpara() before returning.
2437      * We will force mayAllocateText to FALSE before the second call to
2438      * ubidi_setpara(), and will restore it afterwards.
2439      */
2440     saveMayAllocateText=pBiDi->mayAllocateText;
2441     pBiDi->mayAllocateText=FALSE;
2442     ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
2443     pBiDi->mayAllocateText=saveMayAllocateText;
2444     ubidi_getRuns(pBiDi, pErrorCode);
2445     if(U_FAILURE(*pErrorCode)) {
2446         goto cleanup1;
2447     }
2448     /* check if some runs must be split, count how many splits */
2449     addedRuns=0;
2450     runCount=pBiDi->runCount;
2451     runs=pBiDi->runs;
2452     visualStart=0;
2453     for(i=0; i<runCount; i++, visualStart+=runLength) {
2454         runLength=runs[i].visualLimit-visualStart;
2455         if(runLength<2) {
2456             continue;
2457         }
2458         logicalStart=GET_INDEX(runs[i].logicalStart);
2459         for(j=logicalStart+1; j<logicalStart+runLength; j++) {
2460             index0=visualMap[j];
2461             index1=visualMap[j-1];
2462             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2463                 addedRuns++;
2464             }
2465         }
2466     }
2467     if(addedRuns) {
2468         if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2469             if(runCount==1) {
2470                 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2471                 pBiDi->runsMemory[0]=runs[0];
2472             }
2473             runs=pBiDi->runs=pBiDi->runsMemory;
2474             pBiDi->runCount+=addedRuns;
2475         } else {
2476             goto cleanup1;
2477         }
2478     }
2479     /* split runs which are not consecutive in source text */
2480     for(i=runCount-1; i>=0; i--) {
2481         runLength= i==0 ? runs[0].visualLimit :
2482                           runs[i].visualLimit-runs[i-1].visualLimit;
2483         logicalStart=runs[i].logicalStart;
2484         indexOddBit=GET_ODD_BIT(logicalStart);
2485         logicalStart=GET_INDEX(logicalStart);
2486         if(runLength<2) {
2487             if(addedRuns) {
2488                 runs[i+addedRuns]=runs[i];
2489             }
2490             logicalPos=visualMap[logicalStart];
2491             runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2492                                             saveLevels[logicalPos]^indexOddBit);
2493             continue;
2494         }
2495         if(indexOddBit) {
2496             start=logicalStart;
2497             limit=logicalStart+runLength-1;
2498             step=1;
2499         } else {
2500             start=logicalStart+runLength-1;
2501             limit=logicalStart;
2502             step=-1;
2503         }
2504         for(j=start; j!=limit; j+=step) {
2505             index0=visualMap[j];
2506             index1=visualMap[j+step];
2507             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2508                 logicalPos=BIDI_MIN(visualMap[start], index0);
2509                 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2510                                             saveLevels[logicalPos]^indexOddBit);
2511                 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2512                 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2513                 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2514                 runs[i+addedRuns].insertRemove=insertRemove;
2515                 runs[i].insertRemove&=~insertRemove;
2516                 start=j+step;
2517                 addedRuns--;
2518             }
2519         }
2520         if(addedRuns) {
2521             runs[i+addedRuns]=runs[i];
2522         }
2523         logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2524         runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2525                                             saveLevels[logicalPos]^indexOddBit);
2526     }
2527 
2528   cleanup1:
2529     /* restore initial paraLevel */
2530     pBiDi->paraLevel^=1;
2531   cleanup2:
2532     /* restore real text */
2533     pBiDi->text=text;
2534     pBiDi->length=saveLength;
2535     pBiDi->originalLength=length;
2536     pBiDi->direction=saveDirection;
2537     /* the saved levels should never excess levelsSize, but we check anyway */
2538     if(saveLength>pBiDi->levelsSize) {
2539         saveLength=pBiDi->levelsSize;
2540     }
2541     uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
2542     pBiDi->trailingWSStart=saveTrailingWSStart;
2543     if(pBiDi->runCount>1) {
2544         pBiDi->direction=UBIDI_MIXED;
2545     }
2546   cleanup3:
2547     /* free memory for mapping table and visual text */
2548     uprv_free(runsOnlyMemory);
2549 
2550     pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2551 }
2552 
2553 /* ubidi_setPara ------------------------------------------------------------ */
2554 
2555 U_CAPI void U_EXPORT2
ubidi_setPara(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * pErrorCode)2556 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2557               UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2558               UErrorCode *pErrorCode) {
2559     UBiDiDirection direction;
2560     DirProp *dirProps;
2561 
2562     /* check the argument values */
2563     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2564     if(pBiDi==NULL || text==NULL || length<-1 ||
2565        (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
2566         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2567         return;
2568     }
2569 
2570     if(length==-1) {
2571         length=u_strlen(text);
2572     }
2573 
2574     /* special treatment for RUNS_ONLY mode */
2575     if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2576         setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2577         return;
2578     }
2579 
2580     /* initialize the UBiDi structure */
2581     pBiDi->pParaBiDi=NULL;          /* mark unfinished setPara */
2582     pBiDi->text=text;
2583     pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
2584     pBiDi->paraLevel=paraLevel;
2585     pBiDi->direction=(UBiDiDirection)(paraLevel&1);
2586     pBiDi->paraCount=1;
2587 
2588     pBiDi->dirProps=NULL;
2589     pBiDi->levels=NULL;
2590     pBiDi->runs=NULL;
2591     pBiDi->insertPoints.size=0;         /* clean up from last call */
2592     pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
2593 
2594     /*
2595      * Save the original paraLevel if contextual; otherwise, set to 0.
2596      */
2597     pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
2598 
2599     if(length==0) {
2600         /*
2601          * For an empty paragraph, create a UBiDi object with the paraLevel and
2602          * the flags and the direction set but without allocating zero-length arrays.
2603          * There is nothing more to do.
2604          */
2605         if(IS_DEFAULT_LEVEL(paraLevel)) {
2606             pBiDi->paraLevel&=1;
2607             pBiDi->defaultParaLevel=0;
2608         }
2609         pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
2610         pBiDi->runCount=0;
2611         pBiDi->paraCount=0;
2612         setParaSuccess(pBiDi);          /* mark successful setPara */
2613         return;
2614     }
2615 
2616     pBiDi->runCount=-1;
2617 
2618     /* allocate paras memory */
2619     if(pBiDi->parasMemory)
2620         pBiDi->paras=pBiDi->parasMemory;
2621     else
2622         pBiDi->paras=pBiDi->simpleParas;
2623 
2624     /*
2625      * Get the directional properties,
2626      * the flags bit-set, and
2627      * determine the paragraph level if necessary.
2628      */
2629     if(getDirPropsMemory(pBiDi, length)) {
2630         pBiDi->dirProps=pBiDi->dirPropsMemory;
2631         if(!getDirProps(pBiDi)) {
2632             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2633             return;
2634         }
2635     } else {
2636         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2637         return;
2638     }
2639     dirProps=pBiDi->dirProps;
2640     /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2641     length= pBiDi->length;
2642     pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
2643 
2644     /* are explicit levels specified? */
2645     if(embeddingLevels==NULL) {
2646         /* no: determine explicit levels according to the (Xn) rules */\
2647         if(getLevelsMemory(pBiDi, length)) {
2648             pBiDi->levels=pBiDi->levelsMemory;
2649             direction=resolveExplicitLevels(pBiDi, pErrorCode);
2650             if(U_FAILURE(*pErrorCode)) {
2651                 return;
2652             }
2653         } else {
2654             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2655             return;
2656         }
2657     } else {
2658         /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2659         pBiDi->levels=embeddingLevels;
2660         direction=checkExplicitLevels(pBiDi, pErrorCode);
2661         if(U_FAILURE(*pErrorCode)) {
2662             return;
2663         }
2664     }
2665 
2666     /* allocate isolate memory */
2667     if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
2668         pBiDi->isolates=pBiDi->simpleIsolates;
2669     else
2670         if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
2671             pBiDi->isolates=pBiDi->isolatesMemory;
2672         else {
2673             if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2674                 pBiDi->isolates=pBiDi->isolatesMemory;
2675             } else {
2676                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2677                 return;
2678             }
2679         }
2680     pBiDi->isolateCount=-1;             /* current isolates stack entry == none */
2681 
2682     /*
2683      * The steps after (X9) in the UBiDi algorithm are performed only if
2684      * the paragraph text has mixed directionality!
2685      */
2686     pBiDi->direction=direction;
2687     switch(direction) {
2688     case UBIDI_LTR:
2689         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2690         pBiDi->trailingWSStart=0;
2691         break;
2692     case UBIDI_RTL:
2693         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2694         pBiDi->trailingWSStart=0;
2695         break;
2696     default:
2697         /*
2698          *  Choose the right implicit state table
2699          */
2700         switch(pBiDi->reorderingMode) {
2701         case UBIDI_REORDER_DEFAULT:
2702             pBiDi->pImpTabPair=&impTab_DEFAULT;
2703             break;
2704         case UBIDI_REORDER_NUMBERS_SPECIAL:
2705             pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
2706             break;
2707         case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
2708             pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
2709             break;
2710         case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
2711             pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
2712             break;
2713         case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
2714             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2715                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
2716             } else {
2717                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
2718             }
2719             break;
2720         case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
2721             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2722                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
2723             } else {
2724                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
2725             }
2726             break;
2727         default:
2728             /* we should never get here */
2729             U_ASSERT(FALSE);
2730             break;
2731         }
2732         /*
2733          * If there are no external levels specified and there
2734          * are no significant explicit level codes in the text,
2735          * then we can treat the entire paragraph as one run.
2736          * Otherwise, we need to perform the following rules on runs of
2737          * the text with the same embedding levels. (X10)
2738          * "Significant" explicit level codes are ones that actually
2739          * affect non-BN characters.
2740          * Examples for "insignificant" ones are empty embeddings
2741          * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2742          */
2743         if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
2744                                    !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
2745             resolveImplicitLevels(pBiDi, 0, length,
2746                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
2747                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
2748         } else {
2749             /* sor, eor: start and end types of same-level-run */
2750             UBiDiLevel *levels=pBiDi->levels;
2751             int32_t start, limit=0;
2752             UBiDiLevel level, nextLevel;
2753             DirProp sor, eor;
2754 
2755             /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2756             level=GET_PARALEVEL(pBiDi, 0);
2757             nextLevel=levels[0];
2758             if(level<nextLevel) {
2759                 eor=GET_LR_FROM_LEVEL(nextLevel);
2760             } else {
2761                 eor=GET_LR_FROM_LEVEL(level);
2762             }
2763 
2764             do {
2765                 /* determine start and limit of the run (end points just behind the run) */
2766 
2767                 /* the values for this run's start are the same as for the previous run's end */
2768                 start=limit;
2769                 level=nextLevel;
2770                 if((start>0) && (dirProps[start-1]==B)) {
2771                     /* except if this is a new paragraph, then set sor = para level */
2772                     sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
2773                 } else {
2774                     sor=eor;
2775                 }
2776 
2777                 /* search for the limit of this run */
2778                 while((++limit<length) &&
2779                       ((levels[limit]==level) ||
2780                        (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
2781 
2782                 /* get the correct level of the next run */
2783                 if(limit<length) {
2784                     nextLevel=levels[limit];
2785                 } else {
2786                     nextLevel=GET_PARALEVEL(pBiDi, length-1);
2787                 }
2788 
2789                 /* determine eor from max(level, nextLevel); sor is last run's eor */
2790                 if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
2791                     eor=GET_LR_FROM_LEVEL(nextLevel);
2792                 } else {
2793                     eor=GET_LR_FROM_LEVEL(level);
2794                 }
2795 
2796                 /* if the run consists of overridden directional types, then there
2797                    are no implicit types to be resolved */
2798                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2799                     resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2800                 } else {
2801                     /* remove the UBIDI_LEVEL_OVERRIDE flags */
2802                     do {
2803                         levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2804                     } while(start<limit);
2805                 }
2806             } while(limit<length);
2807         }
2808         /* check if we got any memory shortage while adding insert points */
2809         if (U_FAILURE(pBiDi->insertPoints.errorCode))
2810         {
2811             *pErrorCode=pBiDi->insertPoints.errorCode;
2812             return;
2813         }
2814         /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2815         adjustWSLevels(pBiDi);
2816         break;
2817     }
2818     /* add RLM for inverse Bidi with contextual orientation resolving
2819      * to RTL which would not round-trip otherwise
2820      */
2821     if((pBiDi->defaultParaLevel>0) &&
2822        (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2823        ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2824         (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2825         int32_t i, j, start, last;
2826         UBiDiLevel level;
2827         DirProp dirProp;
2828         for(i=0; i<pBiDi->paraCount; i++) {
2829             last=(pBiDi->paras[i].limit)-1;
2830             level= static_cast<UBiDiLevel>(pBiDi->paras[i].level);
2831             if(level==0)
2832                 continue;           /* LTR paragraph */
2833             start= i==0 ? 0 : pBiDi->paras[i-1].limit;
2834             for(j=last; j>=start; j--) {
2835                 dirProp=dirProps[j];
2836                 if(dirProp==L) {
2837                     if(j<last) {
2838                         while(dirProps[last]==B) {
2839                             last--;
2840                         }
2841                     }
2842                     addPoint(pBiDi, last, RLM_BEFORE);
2843                     break;
2844                 }
2845                 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2846                     break;
2847                 }
2848             }
2849         }
2850     }
2851 
2852     if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2853         pBiDi->resultLength -= pBiDi->controlCount;
2854     } else {
2855         pBiDi->resultLength += pBiDi->insertPoints.size;
2856     }
2857     setParaSuccess(pBiDi);              /* mark successful setPara */
2858 }
2859 
2860 U_CAPI void U_EXPORT2
ubidi_orderParagraphsLTR(UBiDi * pBiDi,UBool orderParagraphsLTR)2861 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2862     if(pBiDi!=NULL) {
2863         pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2864     }
2865 }
2866 
2867 U_CAPI UBool U_EXPORT2
ubidi_isOrderParagraphsLTR(UBiDi * pBiDi)2868 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2869     if(pBiDi!=NULL) {
2870         return pBiDi->orderParagraphsLTR;
2871     } else {
2872         return FALSE;
2873     }
2874 }
2875 
2876 U_CAPI UBiDiDirection U_EXPORT2
ubidi_getDirection(const UBiDi * pBiDi)2877 ubidi_getDirection(const UBiDi *pBiDi) {
2878     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2879         return pBiDi->direction;
2880     } else {
2881         return UBIDI_LTR;
2882     }
2883 }
2884 
2885 U_CAPI const UChar * U_EXPORT2
ubidi_getText(const UBiDi * pBiDi)2886 ubidi_getText(const UBiDi *pBiDi) {
2887     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2888         return pBiDi->text;
2889     } else {
2890         return NULL;
2891     }
2892 }
2893 
2894 U_CAPI int32_t U_EXPORT2
ubidi_getLength(const UBiDi * pBiDi)2895 ubidi_getLength(const UBiDi *pBiDi) {
2896     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2897         return pBiDi->originalLength;
2898     } else {
2899         return 0;
2900     }
2901 }
2902 
2903 U_CAPI int32_t U_EXPORT2
ubidi_getProcessedLength(const UBiDi * pBiDi)2904 ubidi_getProcessedLength(const UBiDi *pBiDi) {
2905     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2906         return pBiDi->length;
2907     } else {
2908         return 0;
2909     }
2910 }
2911 
2912 U_CAPI int32_t U_EXPORT2
ubidi_getResultLength(const UBiDi * pBiDi)2913 ubidi_getResultLength(const UBiDi *pBiDi) {
2914     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2915         return pBiDi->resultLength;
2916     } else {
2917         return 0;
2918     }
2919 }
2920 
2921 /* paragraphs API functions ------------------------------------------------- */
2922 
2923 U_CAPI UBiDiLevel U_EXPORT2
ubidi_getParaLevel(const UBiDi * pBiDi)2924 ubidi_getParaLevel(const UBiDi *pBiDi) {
2925     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2926         return pBiDi->paraLevel;
2927     } else {
2928         return 0;
2929     }
2930 }
2931 
2932 U_CAPI int32_t U_EXPORT2
ubidi_countParagraphs(UBiDi * pBiDi)2933 ubidi_countParagraphs(UBiDi *pBiDi) {
2934     if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2935         return 0;
2936     } else {
2937         return pBiDi->paraCount;
2938     }
2939 }
2940 
2941 U_CAPI void U_EXPORT2
ubidi_getParagraphByIndex(const UBiDi * pBiDi,int32_t paraIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2942 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2943                           int32_t *pParaStart, int32_t *pParaLimit,
2944                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2945     int32_t paraStart;
2946 
2947     /* check the argument values */
2948     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2949     RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2950     RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2951 
2952     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2953     if(paraIndex) {
2954         paraStart=pBiDi->paras[paraIndex-1].limit;
2955     } else {
2956         paraStart=0;
2957     }
2958     if(pParaStart!=NULL) {
2959         *pParaStart=paraStart;
2960     }
2961     if(pParaLimit!=NULL) {
2962         *pParaLimit=pBiDi->paras[paraIndex].limit;
2963     }
2964     if(pParaLevel!=NULL) {
2965         *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2966     }
2967 }
2968 
2969 U_CAPI int32_t U_EXPORT2
ubidi_getParagraph(const UBiDi * pBiDi,int32_t charIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2970 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2971                           int32_t *pParaStart, int32_t *pParaLimit,
2972                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2973     int32_t paraIndex;
2974 
2975     /* check the argument values */
2976     /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2977     RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2978     RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2979     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2980     RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2981 
2982     for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
2983     ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2984     return paraIndex;
2985 }
2986 
2987 U_CAPI void U_EXPORT2
ubidi_setClassCallback(UBiDi * pBiDi,UBiDiClassCallback * newFn,const void * newContext,UBiDiClassCallback ** oldFn,const void ** oldContext,UErrorCode * pErrorCode)2988 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2989                        const void *newContext, UBiDiClassCallback **oldFn,
2990                        const void **oldContext, UErrorCode *pErrorCode)
2991 {
2992     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2993     if(pBiDi==NULL) {
2994         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2995         return;
2996     }
2997     if( oldFn )
2998     {
2999         *oldFn = pBiDi->fnClassCallback;
3000     }
3001     if( oldContext )
3002     {
3003         *oldContext = pBiDi->coClassCallback;
3004     }
3005     pBiDi->fnClassCallback = newFn;
3006     pBiDi->coClassCallback = newContext;
3007 }
3008 
3009 U_CAPI void U_EXPORT2
ubidi_getClassCallback(UBiDi * pBiDi,UBiDiClassCallback ** fn,const void ** context)3010 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
3011 {
3012     if(pBiDi==NULL) {
3013         return;
3014     }
3015     if( fn )
3016     {
3017         *fn = pBiDi->fnClassCallback;
3018     }
3019     if( context )
3020     {
3021         *context = pBiDi->coClassCallback;
3022     }
3023 }
3024 
3025 U_CAPI UCharDirection U_EXPORT2
ubidi_getCustomizedClass(UBiDi * pBiDi,UChar32 c)3026 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
3027 {
3028     UCharDirection dir;
3029 
3030     if( pBiDi->fnClassCallback == NULL ||
3031         (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
3032     {
3033         dir = ubidi_getClass(c);
3034     }
3035     if(dir >= U_CHAR_DIRECTION_COUNT) {
3036         dir = (UCharDirection)ON;
3037     }
3038     return dir;
3039 }
3040