• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 1999-2011, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *   file name:  ubidi.c
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 1999jul27
14 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
15 */
16 
17 #include "cmemory.h"
18 #include "unicode/utypes.h"
19 #include "unicode/ustring.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ubidi.h"
22 #include "ubidi_props.h"
23 #include "ubidiimp.h"
24 #include "uassert.h"
25 
26 /*
27  * General implementation notes:
28  *
29  * Throughout the implementation, there are comments like (W2) that refer to
30  * rules of the BiDi algorithm in its version 5, in this example to the second
31  * rule of the resolution of weak types.
32  *
33  * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
34  * character according to UTF-16, the second UChar gets the directional property of
35  * the entire character assigned, while the first one gets a BN, a boundary
36  * neutral, type, which is ignored by most of the algorithm according to
37  * rule (X9) and the implementation suggestions of the BiDi algorithm.
38  *
39  * Later, adjustWSLevels() will set the level for each BN to that of the
40  * following character (UChar), which results in surrogate pairs getting the
41  * same level on each of their surrogates.
42  *
43  * In a UTF-8 implementation, the same thing could be done: the last byte of
44  * a multi-byte sequence would get the "real" property, while all previous
45  * bytes of that sequence would get BN.
46  *
47  * It is not possible to assign all those parts of a character the same real
48  * property because this would fail in the resolution of weak types with rules
49  * that look at immediately surrounding types.
50  *
51  * As a related topic, this implementation does not remove Boundary Neutral
52  * types from the input, but ignores them wherever this is relevant.
53  * For example, the loop for the resolution of the weak types reads
54  * types until it finds a non-BN.
55  * Also, explicit embedding codes are neither changed into BN nor removed.
56  * They are only treated the same way real BNs are.
57  * As stated before, adjustWSLevels() takes care of them at the end.
58  * For the purpose of conformance, the levels of all these codes
59  * do not matter.
60  *
61  * Note that this implementation never modifies the dirProps
62  * after the initial setup.
63  *
64  *
65  * In this implementation, the resolution of weak types (Wn),
66  * neutrals (Nn), and the assignment of the resolved level (In)
67  * are all done in one single loop, in resolveImplicitLevels().
68  * Changes of dirProp values are done on the fly, without writing
69  * them back to the dirProps array.
70  *
71  *
72  * This implementation contains code that allows to bypass steps of the
73  * algorithm that are not needed on the specific paragraph
74  * in order to speed up the most common cases considerably,
75  * like text that is entirely LTR, or RTL text without numbers.
76  *
77  * Most of this is done by setting a bit for each directional property
78  * in a flags variable and later checking for whether there are
79  * any LTR characters or any RTL characters, or both, whether
80  * there are any explicit embedding codes, etc.
81  *
82  * If the (Xn) steps are performed, then the flags are re-evaluated,
83  * because they will then not contain the embedding codes any more
84  * and will be adjusted for override codes, so that subsequently
85  * more bypassing may be possible than what the initial flags suggested.
86  *
87  * If the text is not mixed-directional, then the
88  * algorithm steps for the weak type resolution are not performed,
89  * and all levels are set to the paragraph level.
90  *
91  * If there are no explicit embedding codes, then the (Xn) steps
92  * are not performed.
93  *
94  * If embedding levels are supplied as a parameter, then all
95  * explicit embedding codes are ignored, and the (Xn) steps
96  * are not performed.
97  *
98  * White Space types could get the level of the run they belong to,
99  * and are checked with a test of (flags&MASK_EMBEDDING) to
100  * consider if the paragraph direction should be considered in
101  * the flags variable.
102  *
103  * If there are no White Space types in the paragraph, then
104  * (L1) is not necessary in adjustWSLevels().
105  */
106 
107 /* to avoid some conditional statements, use tiny constant arrays */
108 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
109 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
110 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
111 
112 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
113 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
114 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
115 
116 /* UBiDi object management -------------------------------------------------- */
117 
118 U_CAPI UBiDi * U_EXPORT2
ubidi_open(void)119 ubidi_open(void)
120 {
121     UErrorCode errorCode=U_ZERO_ERROR;
122     return ubidi_openSized(0, 0, &errorCode);
123 }
124 
125 U_CAPI UBiDi * U_EXPORT2
ubidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)126 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
127     UBiDi *pBiDi;
128 
129     /* check the argument values */
130     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
131         return NULL;
132     } else if(maxLength<0 || maxRunCount<0) {
133         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
134         return NULL;    /* invalid arguments */
135     }
136 
137     /* allocate memory for the object */
138     pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
139     if(pBiDi==NULL) {
140         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
141         return NULL;
142     }
143 
144     /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
145     uprv_memset(pBiDi, 0, sizeof(UBiDi));
146 
147     /* get BiDi properties */
148     pBiDi->bdp=ubidi_getSingleton();
149 
150     /* allocate memory for arrays as requested */
151     if(maxLength>0) {
152         if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
153             !getInitialLevelsMemory(pBiDi, maxLength)
154         ) {
155             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
156         }
157     } else {
158         pBiDi->mayAllocateText=TRUE;
159     }
160 
161     if(maxRunCount>0) {
162         if(maxRunCount==1) {
163             /* use simpleRuns[] */
164             pBiDi->runsSize=sizeof(Run);
165         } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
166             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
167         }
168     } else {
169         pBiDi->mayAllocateRuns=TRUE;
170     }
171 
172     if(U_SUCCESS(*pErrorCode)) {
173         return pBiDi;
174     } else {
175         ubidi_close(pBiDi);
176         return NULL;
177     }
178 }
179 
180 /*
181  * We are allowed to allocate memory if memory==NULL or
182  * mayAllocate==TRUE for each array that we need.
183  * We also try to grow memory as needed if we
184  * allocate it.
185  *
186  * Assume sizeNeeded>0.
187  * If *pMemory!=NULL, then assume *pSize>0.
188  *
189  * ### this realloc() may unnecessarily copy the old data,
190  * which we know we don't need any more;
191  * is this the best way to do this??
192  */
193 U_CFUNC UBool
ubidi_getMemory(BidiMemoryForAllocation * bidiMem,int32_t * pSize,UBool mayAllocate,int32_t sizeNeeded)194 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
195     void **pMemory = (void **)bidiMem;
196     /* check for existing memory */
197     if(*pMemory==NULL) {
198         /* we need to allocate memory */
199         if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
200             *pSize=sizeNeeded;
201             return TRUE;
202         } else {
203             return FALSE;
204         }
205     } else {
206         if(sizeNeeded<=*pSize) {
207             /* there is already enough memory */
208             return TRUE;
209         }
210         else if(!mayAllocate) {
211             /* not enough memory, and we must not allocate */
212             return FALSE;
213         } else {
214             /* we try to grow */
215             void *memory;
216             /* in most cases, we do not need the copy-old-data part of
217              * realloc, but it is needed when adding runs using getRunsMemory()
218              * in setParaRunsOnly()
219              */
220             if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
221                 *pMemory=memory;
222                 *pSize=sizeNeeded;
223                 return TRUE;
224             } else {
225                 /* we failed to grow */
226                 return FALSE;
227             }
228         }
229     }
230 }
231 
232 U_CAPI void U_EXPORT2
ubidi_close(UBiDi * pBiDi)233 ubidi_close(UBiDi *pBiDi) {
234     if(pBiDi!=NULL) {
235         pBiDi->pParaBiDi=NULL;          /* in case one tries to reuse this block */
236         if(pBiDi->dirPropsMemory!=NULL) {
237             uprv_free(pBiDi->dirPropsMemory);
238         }
239         if(pBiDi->levelsMemory!=NULL) {
240             uprv_free(pBiDi->levelsMemory);
241         }
242         if(pBiDi->runsMemory!=NULL) {
243             uprv_free(pBiDi->runsMemory);
244         }
245         if(pBiDi->parasMemory!=NULL) {
246             uprv_free(pBiDi->parasMemory);
247         }
248         if(pBiDi->insertPoints.points!=NULL) {
249             uprv_free(pBiDi->insertPoints.points);
250         }
251 
252         uprv_free(pBiDi);
253     }
254 }
255 
256 /* set to approximate "inverse BiDi" ---------------------------------------- */
257 
258 U_CAPI void U_EXPORT2
ubidi_setInverse(UBiDi * pBiDi,UBool isInverse)259 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
260     if(pBiDi!=NULL) {
261         pBiDi->isInverse=isInverse;
262         pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
263                                           : UBIDI_REORDER_DEFAULT;
264     }
265 }
266 
267 U_CAPI UBool U_EXPORT2
ubidi_isInverse(UBiDi * pBiDi)268 ubidi_isInverse(UBiDi *pBiDi) {
269     if(pBiDi!=NULL) {
270         return pBiDi->isInverse;
271     } else {
272         return FALSE;
273     }
274 }
275 
276 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
277  * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
278  * concept of RUNS_ONLY which is a double operation.
279  * It could be advantageous to divide this into 3 concepts:
280  * a) Operation: direct / inverse / RUNS_ONLY
281  * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
282  * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
283  * This would allow combinations not possible today like RUNS_ONLY with
284  * NUMBERS_SPECIAL.
285  * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
286  * REMOVE_CONTROLS for the inverse step.
287  * Not all combinations would be supported, and probably not all do make sense.
288  * This would need to document which ones are supported and what are the
289  * fallbacks for unsupported combinations.
290  */
291 U_CAPI void U_EXPORT2
ubidi_setReorderingMode(UBiDi * pBiDi,UBiDiReorderingMode reorderingMode)292 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
293     if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
294                         && (reorderingMode < UBIDI_REORDER_COUNT)) {
295         pBiDi->reorderingMode = reorderingMode;
296         pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
297     }
298 }
299 
300 U_CAPI UBiDiReorderingMode U_EXPORT2
ubidi_getReorderingMode(UBiDi * pBiDi)301 ubidi_getReorderingMode(UBiDi *pBiDi) {
302     if (pBiDi!=NULL) {
303         return pBiDi->reorderingMode;
304     } else {
305         return UBIDI_REORDER_DEFAULT;
306     }
307 }
308 
309 U_CAPI void U_EXPORT2
ubidi_setReorderingOptions(UBiDi * pBiDi,uint32_t reorderingOptions)310 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
311     if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
312         reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
313     }
314     if (pBiDi!=NULL) {
315         pBiDi->reorderingOptions=reorderingOptions;
316     }
317 }
318 
319 U_CAPI uint32_t U_EXPORT2
ubidi_getReorderingOptions(UBiDi * pBiDi)320 ubidi_getReorderingOptions(UBiDi *pBiDi) {
321     if (pBiDi!=NULL) {
322         return pBiDi->reorderingOptions;
323     } else {
324         return 0;
325     }
326 }
327 
328 U_CAPI UBiDiDirection U_EXPORT2
ubidi_getBaseDirection(const UChar * text,int32_t length)329 ubidi_getBaseDirection(const UChar *text,
330 int32_t length){
331 
332     int32_t i;
333     UChar32 uchar;
334     UCharDirection dir;
335 
336     if( text==NULL || length<-1 ){
337         return UBIDI_NEUTRAL;
338     }
339 
340     if(length==-1) {
341         length=u_strlen(text);
342     }
343 
344     for( i = 0 ; i < length; ) {
345         /* i is incremented by U16_NEXT */
346         U16_NEXT(text, i, length, uchar);
347         dir = u_charDirection(uchar);
348         if( dir == U_LEFT_TO_RIGHT )
349                 return UBIDI_LTR;
350         if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
351                 return UBIDI_RTL;
352     }
353     return UBIDI_NEUTRAL;
354 }
355 
356 /* perform (P2)..(P3) ------------------------------------------------------- */
357 
358 static DirProp
firstL_R_AL(UBiDi * pBiDi)359 firstL_R_AL(UBiDi *pBiDi) {
360     /* return first strong char after the last B in prologue if any */
361     const UChar *text=pBiDi->prologue;
362     int32_t length=pBiDi->proLength;
363     int32_t i;
364     UChar32 uchar;
365     DirProp dirProp, result=ON;
366     for(i=0; i<length; ) {
367         /* i is incremented by U16_NEXT */
368         U16_NEXT(text, i, length, uchar);
369         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
370         if(result==ON) {
371             if(dirProp==L || dirProp==R || dirProp==AL) {
372                 result=dirProp;
373             }
374         } else {
375             if(dirProp==B) {
376                 result=ON;
377             }
378         }
379     }
380     return result;
381 }
382 
383 /*
384  * Get the directional properties for the text,
385  * calculate the flags bit-set, and
386  * determine the paragraph level if necessary.
387  */
388 static void
getDirProps(UBiDi * pBiDi)389 getDirProps(UBiDi *pBiDi) {
390     const UChar *text=pBiDi->text;
391     DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
392 
393     int32_t i=0, i1, length=pBiDi->originalLength;
394     Flags flags=0;      /* collect all directionalities in the text */
395     UChar32 uchar;
396     DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */
397     UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
398     /* for inverse BiDi, the default para level is set to RTL if there is a
399        strong R or AL character at either end of the text                           */
400     UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
401             (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
402              pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
403     int32_t lastArabicPos=-1;
404     int32_t controlCount=0;
405     UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
406                                        UBIDI_OPTION_REMOVE_CONTROLS);
407 
408     typedef enum {
409          NOT_CONTEXTUAL,                /* 0: not contextual paraLevel */
410          LOOKING_FOR_STRONG,            /* 1: looking for first strong char */
411          FOUND_STRONG_CHAR              /* 2: found first strong char       */
412     } State;
413     State state;
414     int32_t paraStart=0;                /* index of first char in paragraph */
415     DirProp paraDir;                    /* == CONTEXT_RTL within paragraphs
416                                            starting with strong R char      */
417     DirProp lastStrongDir=0;            /* for default level & inverse BiDi */
418     int32_t lastStrongLTR=0;            /* for STREAMING option             */
419 
420     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
421         pBiDi->length=0;
422         lastStrongLTR=0;
423     }
424     if(isDefaultLevel) {
425         DirProp lastStrong;
426         paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0;
427         if(pBiDi->proLength>0 &&
428            (lastStrong=firstL_R_AL(pBiDi))!=ON) {
429             paraDir=(lastStrong==L) ? 0 : CONTEXT_RTL;
430             state=FOUND_STRONG_CHAR;
431         } else {
432             paraDir=paraDirDefault;
433             state=LOOKING_FOR_STRONG;
434         }
435         lastStrongDir=paraDir;
436     } else {
437         state=NOT_CONTEXTUAL;
438         paraDir=0;
439     }
440     /* count paragraphs and determine the paragraph level (P2..P3) */
441     /*
442      * see comment in ubidi.h:
443      * the DEFAULT_XXX values are designed so that
444      * their bit 0 alone yields the intended default
445      */
446     for( /* i=0 above */ ; i<length; ) {
447         /* i is incremented by U16_NEXT */
448         U16_NEXT(text, i, length, uchar);
449         flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
450         dirProps[i-1]=dirProp|paraDir;
451         if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
452             flags|=DIRPROP_FLAG(BN);
453             dirProps[i-2]=(DirProp)(BN|paraDir);
454         }
455         if(state==LOOKING_FOR_STRONG) {
456             if(dirProp==L) {
457                 state=FOUND_STRONG_CHAR;
458                 if(paraDir) {
459                     paraDir=0;
460                     for(i1=paraStart; i1<i; i1++) {
461                         dirProps[i1]&=~CONTEXT_RTL;
462                     }
463                 }
464                 continue;
465             }
466             if(dirProp==R || dirProp==AL) {
467                 state=FOUND_STRONG_CHAR;
468                 if(paraDir==0) {
469                     paraDir=CONTEXT_RTL;
470                     for(i1=paraStart; i1<i; i1++) {
471                         dirProps[i1]|=CONTEXT_RTL;
472                     }
473                 }
474                 continue;
475             }
476         }
477         if(dirProp==L) {
478             lastStrongDir=0;
479             lastStrongLTR=i;            /* i is index to next character */
480         }
481         else if(dirProp==R) {
482             lastStrongDir=CONTEXT_RTL;
483         }
484         else if(dirProp==AL) {
485             lastStrongDir=CONTEXT_RTL;
486             lastArabicPos=i-1;
487         }
488         else if(dirProp==B) {
489             if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
490                 pBiDi->length=i;        /* i is index to next character */
491             }
492             if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
493                 for( ; paraStart<i; paraStart++) {
494                     dirProps[paraStart]|=CONTEXT_RTL;
495                 }
496             }
497             if(i<length) {              /* B not last char in text */
498                 if(!((uchar==CR) && (text[i]==LF))) {
499                     pBiDi->paraCount++;
500                 }
501                 if(isDefaultLevel) {
502                     state=LOOKING_FOR_STRONG;
503                     paraStart=i;        /* i is index to next character */
504                     paraDir=paraDirDefault;
505                     lastStrongDir=paraDirDefault;
506                 }
507             }
508         }
509         if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) {
510             controlCount++;
511         }
512     }
513     if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
514         for(i1=paraStart; i1<length; i1++) {
515             dirProps[i1]|=CONTEXT_RTL;
516         }
517     }
518     if(isDefaultLevel) {
519         pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0);
520     }
521     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
522         if((lastStrongLTR>pBiDi->length) &&
523            (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) {
524             pBiDi->length = lastStrongLTR;
525         }
526         if(pBiDi->length<pBiDi->originalLength) {
527             pBiDi->paraCount--;
528         }
529     }
530     /* The following line does nothing new for contextual paraLevel, but is
531        needed for absolute paraLevel.                               */
532     flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
533 
534     if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
535         flags|=DIRPROP_FLAG(L);
536     }
537 
538     pBiDi->controlCount = controlCount;
539     pBiDi->flags=flags;
540     pBiDi->lastArabicPos=lastArabicPos;
541 }
542 
543 /* perform (X1)..(X9) ------------------------------------------------------- */
544 
545 /* determine if the text is mixed-directional or single-directional */
546 static UBiDiDirection
directionFromFlags(UBiDi * pBiDi)547 directionFromFlags(UBiDi *pBiDi) {
548     Flags flags=pBiDi->flags;
549     /* if the text contains AN and neutrals, then some neutrals may become RTL */
550     if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
551         return UBIDI_LTR;
552     } else if(!(flags&MASK_LTR)) {
553         return UBIDI_RTL;
554     } else {
555         return UBIDI_MIXED;
556     }
557 }
558 
559 /*
560  * Resolve the explicit levels as specified by explicit embedding codes.
561  * Recalculate the flags to have them reflect the real properties
562  * after taking the explicit embeddings into account.
563  *
564  * The BiDi algorithm is designed to result in the same behavior whether embedding
565  * levels are externally specified (from "styled text", supposedly the preferred
566  * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
567  * That is why (X9) instructs to remove all explicit codes (and BN).
568  * However, in a real implementation, this removal of these codes and their index
569  * positions in the plain text is undesirable since it would result in
570  * reallocated, reindexed text.
571  * Instead, this implementation leaves the codes in there and just ignores them
572  * in the subsequent processing.
573  * In order to get the same reordering behavior, positions with a BN or an
574  * explicit embedding code just get the same level assigned as the last "real"
575  * character.
576  *
577  * Some implementations, not this one, then overwrite some of these
578  * directionality properties at "real" same-level-run boundaries by
579  * L or R codes so that the resolution of weak types can be performed on the
580  * entire paragraph at once instead of having to parse it once more and
581  * perform that resolution on same-level-runs.
582  * This limits the scope of the implicit rules in effectively
583  * the same way as the run limits.
584  *
585  * Instead, this implementation does not modify these codes.
586  * On one hand, the paragraph has to be scanned for same-level-runs, but
587  * on the other hand, this saves another loop to reset these codes,
588  * or saves making and modifying a copy of dirProps[].
589  *
590  *
591  * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
592  *
593  *
594  * Handling the stack of explicit levels (Xn):
595  *
596  * With the BiDi stack of explicit levels,
597  * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
598  * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
599  *
600  * In order to have a correct push-pop semantics even in the case of overflows,
601  * there are two overflow counters:
602  * - countOver60 is incremented with each LRx at level 60
603  * - from level 60, one RLx increases the level to 61
604  * - countOver61 is incremented with each LRx and RLx at level 61
605  *
606  * Popping levels with PDF must work in the opposite order so that level 61
607  * is correct at the correct point. Underflows (too many PDFs) must be checked.
608  *
609  * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
610  */
611 static UBiDiDirection
resolveExplicitLevels(UBiDi * pBiDi)612 resolveExplicitLevels(UBiDi *pBiDi) {
613     const DirProp *dirProps=pBiDi->dirProps;
614     UBiDiLevel *levels=pBiDi->levels;
615     const UChar *text=pBiDi->text;
616 
617     int32_t i=0, length=pBiDi->length;
618     Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
619     DirProp dirProp;
620     UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
621 
622     UBiDiDirection direction;
623     int32_t paraIndex=0;
624 
625     /* determine if the text is mixed-directional or single-directional */
626     direction=directionFromFlags(pBiDi);
627 
628     /* we may not need to resolve any explicit levels, but for multiple
629        paragraphs we want to loop on all chars to set the para boundaries */
630     if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) {
631         /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
632     } else if((pBiDi->paraCount==1) &&
633               (!(flags&MASK_EXPLICIT) ||
634                (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) {
635         /* mixed, but all characters are at the same embedding level */
636         /* or we are in "inverse BiDi" */
637         /* and we don't have contextual multiple paragraphs with some B char */
638         /* set all levels to the paragraph level */
639         for(i=0; i<length; ++i) {
640             levels[i]=level;
641         }
642     } else {
643         /* continue to perform (Xn) */
644 
645         /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
646         /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
647         UBiDiLevel embeddingLevel=level, newLevel, stackTop=0;
648 
649         UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL];        /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
650         uint32_t countOver60=0, countOver61=0;  /* count overflows of explicit levels */
651 
652         /* recalculate the flags */
653         flags=0;
654 
655         for(i=0; i<length; ++i) {
656             dirProp=NO_CONTEXT_RTL(dirProps[i]);
657             switch(dirProp) {
658             case LRE:
659             case LRO:
660                 /* (X3, X5) */
661                 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
662                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
663                     stack[stackTop]=embeddingLevel;
664                     ++stackTop;
665                     embeddingLevel=newLevel;
666                     if(dirProp==LRO) {
667                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
668                     }
669                     /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE
670                        since this has already been done for newLevel which is
671                        the source for embeddingLevel.
672                      */
673                 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) {
674                     ++countOver61;
675                 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
676                     ++countOver60;
677                 }
678                 flags|=DIRPROP_FLAG(BN);
679                 break;
680             case RLE:
681             case RLO:
682                 /* (X2, X4) */
683                 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
684                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
685                     stack[stackTop]=embeddingLevel;
686                     ++stackTop;
687                     embeddingLevel=newLevel;
688                     if(dirProp==RLO) {
689                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
690                     }
691                     /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE
692                        since this has already been done for newLevel which is
693                        the source for embeddingLevel.
694                      */
695                 } else {
696                     ++countOver61;
697                 }
698                 flags|=DIRPROP_FLAG(BN);
699                 break;
700             case PDF:
701                 /* (X7) */
702                 /* handle all the overflow cases first */
703                 if(countOver61>0) {
704                     --countOver61;
705                 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) {
706                     /* handle LRx overflows from level 60 */
707                     --countOver60;
708                 } else if(stackTop>0) {
709                     /* this is the pop operation; it also pops level 61 while countOver60>0 */
710                     --stackTop;
711                     embeddingLevel=stack[stackTop];
712                 /* } else { (underflow) */
713                 }
714                 flags|=DIRPROP_FLAG(BN);
715                 break;
716             case B:
717                 stackTop=0;
718                 countOver60=countOver61=0;
719                 level=GET_PARALEVEL(pBiDi, i);
720                 if((i+1)<length) {
721                     embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
722                     if(!((text[i]==CR) && (text[i+1]==LF))) {
723                         pBiDi->paras[paraIndex++]=i+1;
724                     }
725                 }
726                 flags|=DIRPROP_FLAG(B);
727                 break;
728             case BN:
729                 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
730                 /* they will get their levels set correctly in adjustWSLevels() */
731                 flags|=DIRPROP_FLAG(BN);
732                 break;
733             default:
734                 /* all other types get the "real" level */
735                 if(level!=embeddingLevel) {
736                     level=embeddingLevel;
737                     if(level&UBIDI_LEVEL_OVERRIDE) {
738                         flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
739                     } else {
740                         flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
741                     }
742                 }
743                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
744                     flags|=DIRPROP_FLAG(dirProp);
745                 }
746                 break;
747             }
748 
749             /*
750              * We need to set reasonable levels even on BN codes and
751              * explicit codes because we will later look at same-level runs (X10).
752              */
753             levels[i]=level;
754         }
755         if(flags&MASK_EMBEDDING) {
756             flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
757         }
758         if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
759             flags|=DIRPROP_FLAG(L);
760         }
761 
762         /* subsequently, ignore the explicit codes and BN (X9) */
763 
764         /* again, determine if the text is mixed-directional or single-directional */
765         pBiDi->flags=flags;
766         direction=directionFromFlags(pBiDi);
767     }
768 
769     return direction;
770 }
771 
772 /*
773  * Use a pre-specified embedding levels array:
774  *
775  * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
776  * ignore all explicit codes (X9),
777  * and check all the preset levels.
778  *
779  * Recalculate the flags to have them reflect the real properties
780  * after taking the explicit embeddings into account.
781  */
782 static UBiDiDirection
checkExplicitLevels(UBiDi * pBiDi,UErrorCode * pErrorCode)783 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
784     const DirProp *dirProps=pBiDi->dirProps;
785     DirProp dirProp;
786     UBiDiLevel *levels=pBiDi->levels;
787     const UChar *text=pBiDi->text;
788 
789     int32_t i, length=pBiDi->length;
790     Flags flags=0;  /* collect all directionalities in the text */
791     UBiDiLevel level;
792     uint32_t paraIndex=0;
793 
794     for(i=0; i<length; ++i) {
795         level=levels[i];
796         dirProp=NO_CONTEXT_RTL(dirProps[i]);
797         if(level&UBIDI_LEVEL_OVERRIDE) {
798             /* keep the override flag in levels[i] but adjust the flags */
799             level&=~UBIDI_LEVEL_OVERRIDE;     /* make the range check below simpler */
800             flags|=DIRPROP_FLAG_O(level);
801         } else {
802             /* set the flags */
803             flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
804         }
805         if((level<GET_PARALEVEL(pBiDi, i) &&
806             !((0==level)&&(dirProp==B))) ||
807            (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
808             /* level out of bounds */
809             *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
810             return UBIDI_LTR;
811         }
812         if((dirProp==B) && ((i+1)<length)) {
813             if(!((text[i]==CR) && (text[i+1]==LF))) {
814                 pBiDi->paras[paraIndex++]=i+1;
815             }
816         }
817     }
818     if(flags&MASK_EMBEDDING) {
819         flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
820     }
821 
822     /* determine if the text is mixed-directional or single-directional */
823     pBiDi->flags=flags;
824     return directionFromFlags(pBiDi);
825 }
826 
827 /******************************************************************
828  The Properties state machine table
829 *******************************************************************
830 
831  All table cells are 8 bits:
832       bits 0..4:  next state
833       bits 5..7:  action to perform (if > 0)
834 
835  Cells may be of format "n" where n represents the next state
836  (except for the rightmost column).
837  Cells may also be of format "s(x,y)" where x represents an action
838  to perform and y represents the next state.
839 
840 *******************************************************************
841  Definitions and type for properties state table
842 *******************************************************************
843 */
844 #define IMPTABPROPS_COLUMNS 14
845 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
846 #define GET_STATEPROPS(cell) ((cell)&0x1f)
847 #define GET_ACTIONPROPS(cell) ((cell)>>5)
848 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
849 
850 static const uint8_t groupProp[] =          /* dirProp regrouped */
851 {
852 /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  */
853     0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10
854 };
855 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
856 
857 /******************************************************************
858 
859       PROPERTIES  STATE  TABLE
860 
861  In table impTabProps,
862       - the ON column regroups ON and WS
863       - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
864       - the Res column is the reduced property assigned to a run
865 
866  Action 1: process current run1, init new run1
867         2: init new run2
868         3: process run1, process run2, init new run1
869         4: process run1, set run1=run2, init new run2
870 
871  Notes:
872   1) This table is used in resolveImplicitLevels().
873   2) This table triggers actions when there is a change in the Bidi
874      property of incoming characters (action 1).
875   3) Most such property sequences are processed immediately (in
876      fact, passed to processPropertySeq().
877   4) However, numbers are assembled as one sequence. This means
878      that undefined situations (like CS following digits, until
879      it is known if the next char will be a digit) are held until
880      following chars define them.
881      Example: digits followed by CS, then comes another CS or ON;
882               the digits will be processed, then the CS assigned
883               as the start of an ON sequence (action 3).
884   5) There are cases where more than one sequence must be
885      processed, for instance digits followed by CS followed by L:
886      the digits must be processed as one sequence, and the CS
887      must be processed as an ON sequence, all this before starting
888      assembling chars for the opening L sequence.
889 
890 
891 */
892 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
893 {
894 /*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,  Res */
895 /* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,  DirProp_ON },
896 /* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),   DirProp_L },
897 /* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),   DirProp_R },
898 /* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,   DirProp_R },
899 /* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),  DirProp_EN },
900 /* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),  DirProp_AN },
901 /* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),  DirProp_AN },
902 /* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),  DirProp_ON },
903 /* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),  DirProp_ON },
904 /* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),  DirProp_ON },
905 /*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),  DirProp_EN },
906 /*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),  DirProp_EN },
907 /*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),  DirProp_AN },
908 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),  DirProp_AN },
909 /*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),  DirProp_ON },
910 /*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),   DirProp_S },
911 /*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),   DirProp_S },
912 /*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),   DirProp_B }
913 };
914 
915 /*  we must undef macro s because the levels table have a different
916  *  structure (4 bits for action and 4 bits for next state.
917  */
918 #undef s
919 
920 /******************************************************************
921  The levels state machine tables
922 *******************************************************************
923 
924  All table cells are 8 bits:
925       bits 0..3:  next state
926       bits 4..7:  action to perform (if > 0)
927 
928  Cells may be of format "n" where n represents the next state
929  (except for the rightmost column).
930  Cells may also be of format "s(x,y)" where x represents an action
931  to perform and y represents the next state.
932 
933  This format limits each table to 16 states each and to 15 actions.
934 
935 *******************************************************************
936  Definitions and type for levels state tables
937 *******************************************************************
938 */
939 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
940 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
941 #define GET_STATE(cell) ((cell)&0x0f)
942 #define GET_ACTION(cell) ((cell)>>4)
943 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
944 
945 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
946 typedef uint8_t ImpAct[];
947 
948 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
949  * instead of having a pair of ImpTab and a pair of ImpAct.
950  */
951 typedef struct ImpTabPair {
952     const void * pImpTab[2];
953     const void * pImpAct[2];
954 } ImpTabPair;
955 
956 /******************************************************************
957 
958       LEVELS  STATE  TABLES
959 
960  In all levels state tables,
961       - state 0 is the initial state
962       - the Res column is the increment to add to the text level
963         for this property sequence.
964 
965  The impAct arrays for each table of a pair map the local action
966  numbers of the table to the total list of actions. For instance,
967  action 2 in a given table corresponds to the action number which
968  appears in entry [2] of the impAct array for that table.
969  The first entry of all impAct arrays must be 0.
970 
971  Action 1: init conditional sequence
972         2: prepend conditional sequence to current sequence
973         3: set ON sequence to new level - 1
974         4: init EN/AN/ON sequence
975         5: fix EN/AN/ON sequence followed by R
976         6: set previous level sequence to level 2
977 
978  Notes:
979   1) These tables are used in processPropertySeq(). The input
980      is property sequences as determined by resolveImplicitLevels.
981   2) Most such property sequences are processed immediately
982      (levels are assigned).
983   3) However, some sequences cannot be assigned a final level till
984      one or more following sequences are received. For instance,
985      ON following an R sequence within an even-level paragraph.
986      If the following sequence is R, the ON sequence will be
987      assigned basic run level+1, and so will the R sequence.
988   4) S is generally handled like ON, since its level will be fixed
989      to paragraph level in adjustWSLevels().
990 
991 */
992 
993 static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
994 /*  In this table, conditional sequences receive the higher possible level
995     until proven otherwise.
996 */
997 {
998 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
999 /* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
1000 /* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1001 /* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
1002 /* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
1003 /* 4 : R+ON       */ { s(2,0),     1 ,     3 ,     3 ,     4 ,     4 , s(2,0),  1 },
1004 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0),     2 ,     5 ,     5 , s(2,0),  1 }
1005 };
1006 static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
1007 /*  In this table, conditional sequences receive the lower possible level
1008     until proven otherwise.
1009 */
1010 {
1011 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1012 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1013 /* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1014 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1015 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
1016 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
1017 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
1018 };
1019 static const ImpAct impAct0 = {0,1,2,3,4,5,6};
1020 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1021                                            &impTabR_DEFAULT},
1022                                           {&impAct0, &impAct0}};
1023 
1024 static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
1025 /*  In this table, conditional sequences receive the higher possible level
1026     until proven otherwise.
1027 */
1028 {
1029 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1030 /* 0 : init       */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  0 },
1031 /* 1 : L+EN/AN    */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  2 },
1032 /* 2 : R          */ {     0 ,     2 ,    4 ,      4 , s(1,3),     0 ,     0 ,  1 },
1033 /* 3 : R+ON       */ { s(2,0),     2 ,    4 ,      4 ,     3 ,     3 , s(2,0),  1 },
1034 /* 4 : R+EN/AN    */ {     0 ,     2 ,    4 ,      4 , s(1,3), s(1,3),     0 ,  2 }
1035   };
1036 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1037                                                    &impTabR_DEFAULT},
1038                                                   {&impAct0, &impAct0}};
1039 
1040 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1041 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
1042     until proven that there is L or sor/eor on both sides. AN is handled like EN.
1043 */
1044 {
1045 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1046 /* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1047 /* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
1048 /* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
1049 /* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
1050 /* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
1051 /* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
1052 };
1053 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1054 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
1055     until proven that there is L on both sides. AN is handled like EN.
1056 */
1057 {
1058 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1059 /* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1060 /* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1061 /* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
1062 /* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
1063 /* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
1064 };
1065 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1066                         {&impTabL_GROUP_NUMBERS_WITH_R,
1067                          &impTabR_GROUP_NUMBERS_WITH_R},
1068                         {&impAct0, &impAct0}};
1069 
1070 
1071 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1072 /*  This table is identical to the Default LTR table except that EN and AN are
1073     handled like L.
1074 */
1075 {
1076 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1077 /* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
1078 /* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
1079 /* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
1080 /* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
1081 /* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
1082 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
1083 };
1084 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1085 /*  This table is identical to the Default RTL table except that EN and AN are
1086     handled like L.
1087 */
1088 {
1089 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1090 /* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1091 /* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
1092 /* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1093 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
1094 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
1095 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
1096 };
1097 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1098                         {&impTabL_INVERSE_NUMBERS_AS_L,
1099                          &impTabR_INVERSE_NUMBERS_AS_L},
1100                         {&impAct0, &impAct0}};
1101 
1102 static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
1103 /*  In this table, conditional sequences receive the lower possible level
1104     until proven otherwise.
1105 */
1106 {
1107 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1108 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1109 /* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
1110 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1111 /* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
1112 /* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
1113 /* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
1114 /* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
1115 };
1116 static const ImpAct impAct1 = {0,1,11,12};
1117 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1118  */
1119 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1120                         {&impTabL_DEFAULT,
1121                          &impTabR_INVERSE_LIKE_DIRECT},
1122                         {&impAct0, &impAct1}};
1123 
1124 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1125 /*  The case handled in this table is (visually):  R EN L
1126 */
1127 {
1128 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1129 /* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
1130 /* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
1131 /* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
1132 /* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
1133 /* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
1134 /* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
1135 /* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
1136 };
1137 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1138 /*  The cases handled in this table are (visually):  R EN L
1139                                                      R L AN L
1140 */
1141 {
1142 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1143 /* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1144 /* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
1145 /* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
1146 /* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
1147 /* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
1148 /* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
1149 /* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
1150 };
1151 static const ImpAct impAct2 = {0,1,7,8,9,10};
1152 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1153                         {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1154                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1155                         {&impAct0, &impAct2}};
1156 
1157 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1158                         {&impTabL_NUMBERS_SPECIAL,
1159                          &impTabR_INVERSE_LIKE_DIRECT},
1160                         {&impAct0, &impAct1}};
1161 
1162 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1163 /*  The case handled in this table is (visually):  R EN L
1164 */
1165 {
1166 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1167 /* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1168 /* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
1169 /* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
1170 /* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
1171 /* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
1172 };
1173 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1174                         {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1175                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1176                         {&impAct0, &impAct2}};
1177 
1178 #undef s
1179 
1180 typedef struct {
1181     const ImpTab * pImpTab;             /* level table pointer          */
1182     const ImpAct * pImpAct;             /* action map array             */
1183     int32_t startON;                    /* start of ON sequence         */
1184     int32_t startL2EN;                  /* start of level 2 sequence    */
1185     int32_t lastStrongRTL;              /* index of last found R or AL  */
1186     int32_t state;                      /* current state                */
1187     UBiDiLevel runLevel;                /* run level before implicit solving */
1188 } LevState;
1189 
1190 /*------------------------------------------------------------------------*/
1191 
1192 static void
addPoint(UBiDi * pBiDi,int32_t pos,int32_t flag)1193 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1194   /* param pos:     position where to insert
1195      param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1196   */
1197 {
1198 #define FIRSTALLOC  10
1199     Point point;
1200     InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1201 
1202     if (pInsertPoints->capacity == 0)
1203     {
1204         pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
1205         if (pInsertPoints->points == NULL)
1206         {
1207             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1208             return;
1209         }
1210         pInsertPoints->capacity=FIRSTALLOC;
1211     }
1212     if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1213     {
1214         void * savePoints=pInsertPoints->points;
1215         pInsertPoints->points=uprv_realloc(pInsertPoints->points,
1216                                            pInsertPoints->capacity*2*sizeof(Point));
1217         if (pInsertPoints->points == NULL)
1218         {
1219             pInsertPoints->points=savePoints;
1220             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1221             return;
1222         }
1223         else  pInsertPoints->capacity*=2;
1224     }
1225     point.pos=pos;
1226     point.flag=flag;
1227     pInsertPoints->points[pInsertPoints->size]=point;
1228     pInsertPoints->size++;
1229 #undef FIRSTALLOC
1230 }
1231 
1232 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1233 
1234 /*
1235  * This implementation of the (Wn) rules applies all rules in one pass.
1236  * In order to do so, it needs a look-ahead of typically 1 character
1237  * (except for W5: sequences of ET) and keeps track of changes
1238  * in a rule Wp that affect a later Wq (p<q).
1239  *
1240  * The (Nn) and (In) rules are also performed in that same single loop,
1241  * but effectively one iteration behind for white space.
1242  *
1243  * Since all implicit rules are performed in one step, it is not necessary
1244  * to actually store the intermediate directional properties in dirProps[].
1245  */
1246 
1247 static void
processPropertySeq(UBiDi * pBiDi,LevState * pLevState,uint8_t _prop,int32_t start,int32_t limit)1248 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1249                    int32_t start, int32_t limit) {
1250     uint8_t cell, oldStateSeq, actionSeq;
1251     const ImpTab * pImpTab=pLevState->pImpTab;
1252     const ImpAct * pImpAct=pLevState->pImpAct;
1253     UBiDiLevel * levels=pBiDi->levels;
1254     UBiDiLevel level, addLevel;
1255     InsertPoints * pInsertPoints;
1256     int32_t start0, k;
1257 
1258     start0=start;                           /* save original start position */
1259     oldStateSeq=(uint8_t)pLevState->state;
1260     cell=(*pImpTab)[oldStateSeq][_prop];
1261     pLevState->state=GET_STATE(cell);       /* isolate the new state */
1262     actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1263     addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1264 
1265     if(actionSeq) {
1266         switch(actionSeq) {
1267         case 1:                         /* init ON seq */
1268             pLevState->startON=start0;
1269             break;
1270 
1271         case 2:                         /* prepend ON seq to current seq */
1272             start=pLevState->startON;
1273             break;
1274 
1275         case 3:                         /* L or S after possible relevant EN/AN */
1276             /* check if we had EN after R/AL */
1277             if (pLevState->startL2EN >= 0) {
1278                 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1279             }
1280             pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
1281             /* check if we had any relevant EN/AN after R/AL */
1282             pInsertPoints=&(pBiDi->insertPoints);
1283             if ((pInsertPoints->capacity == 0) ||
1284                 (pInsertPoints->size <= pInsertPoints->confirmed))
1285             {
1286                 /* nothing, just clean up */
1287                 pLevState->lastStrongRTL=-1;
1288                 /* check if we have a pending conditional segment */
1289                 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1290                 if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
1291                     start=pLevState->startON;   /* reset to basic run level */
1292                 }
1293                 if (_prop == DirProp_S)                /* add LRM before S */
1294                 {
1295                     addPoint(pBiDi, start0, LRM_BEFORE);
1296                     pInsertPoints->confirmed=pInsertPoints->size;
1297                 }
1298                 break;
1299             }
1300             /* reset previous RTL cont to level for LTR text */
1301             for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1302             {
1303                 /* reset odd level, leave runLevel+2 as is */
1304                 levels[k]=(levels[k] - 2) & ~1;
1305             }
1306             /* mark insert points as confirmed */
1307             pInsertPoints->confirmed=pInsertPoints->size;
1308             pLevState->lastStrongRTL=-1;
1309             if (_prop == DirProp_S)            /* add LRM before S */
1310             {
1311                 addPoint(pBiDi, start0, LRM_BEFORE);
1312                 pInsertPoints->confirmed=pInsertPoints->size;
1313             }
1314             break;
1315 
1316         case 4:                         /* R/AL after possible relevant EN/AN */
1317             /* just clean up */
1318             pInsertPoints=&(pBiDi->insertPoints);
1319             if (pInsertPoints->capacity > 0)
1320                 /* remove all non confirmed insert points */
1321                 pInsertPoints->size=pInsertPoints->confirmed;
1322             pLevState->startON=-1;
1323             pLevState->startL2EN=-1;
1324             pLevState->lastStrongRTL=limit - 1;
1325             break;
1326 
1327         case 5:                         /* EN/AN after R/AL + possible cont */
1328             /* check for real AN */
1329             if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) &&
1330                 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1331             {
1332                 /* real AN */
1333                 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1334                 {
1335                     /* just note the righmost digit as a strong RTL */
1336                     pLevState->lastStrongRTL=limit - 1;
1337                     break;
1338                 }
1339                 if (pLevState->startL2EN >= 0)  /* after EN, no AN */
1340                 {
1341                     addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1342                     pLevState->startL2EN=-2;
1343                 }
1344                 /* note AN */
1345                 addPoint(pBiDi, start0, LRM_BEFORE);
1346                 break;
1347             }
1348             /* if first EN/AN after R/AL */
1349             if (pLevState->startL2EN == -1) {
1350                 pLevState->startL2EN=start0;
1351             }
1352             break;
1353 
1354         case 6:                         /* note location of latest R/AL */
1355             pLevState->lastStrongRTL=limit - 1;
1356             pLevState->startON=-1;
1357             break;
1358 
1359         case 7:                         /* L after R+ON/EN/AN */
1360             /* include possible adjacent number on the left */
1361             for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1362             if(k>=0) {
1363                 addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
1364                 pInsertPoints=&(pBiDi->insertPoints);
1365                 pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
1366             }
1367             pLevState->startON=start0;
1368             break;
1369 
1370         case 8:                         /* AN after L */
1371             /* AN numbers between L text on both sides may be trouble. */
1372             /* tentatively bracket with LRMs; will be confirmed if followed by L */
1373             addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
1374             addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
1375             break;
1376 
1377         case 9:                         /* R after L+ON/EN/AN */
1378             /* false alert, infirm LRMs around previous AN */
1379             pInsertPoints=&(pBiDi->insertPoints);
1380             pInsertPoints->size=pInsertPoints->confirmed;
1381             if (_prop == DirProp_S)            /* add RLM before S */
1382             {
1383                 addPoint(pBiDi, start0, RLM_BEFORE);
1384                 pInsertPoints->confirmed=pInsertPoints->size;
1385             }
1386             break;
1387 
1388         case 10:                        /* L after L+ON/AN */
1389             level=pLevState->runLevel + addLevel;
1390             for(k=pLevState->startON; k<start0; k++) {
1391                 if (levels[k]<level)
1392                     levels[k]=level;
1393             }
1394             pInsertPoints=&(pBiDi->insertPoints);
1395             pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
1396             pLevState->startON=start0;
1397             break;
1398 
1399         case 11:                        /* L after L+ON+EN/AN/ON */
1400             level=pLevState->runLevel;
1401             for(k=start0-1; k>=pLevState->startON; k--) {
1402                 if(levels[k]==level+3) {
1403                     while(levels[k]==level+3) {
1404                         levels[k--]-=2;
1405                     }
1406                     while(levels[k]==level) {
1407                         k--;
1408                     }
1409                 }
1410                 if(levels[k]==level+2) {
1411                     levels[k]=level;
1412                     continue;
1413                 }
1414                 levels[k]=level+1;
1415             }
1416             break;
1417 
1418         case 12:                        /* R after L+ON+EN/AN/ON */
1419             level=pLevState->runLevel+1;
1420             for(k=start0-1; k>=pLevState->startON; k--) {
1421                 if(levels[k]>level) {
1422                     levels[k]-=2;
1423                 }
1424             }
1425             break;
1426 
1427         default:                        /* we should never get here */
1428             U_ASSERT(FALSE);
1429             break;
1430         }
1431     }
1432     if((addLevel) || (start < start0)) {
1433         level=pLevState->runLevel + addLevel;
1434         for(k=start; k<limit; k++) {
1435             levels[k]=level;
1436         }
1437     }
1438 }
1439 
1440 static DirProp
lastL_R_AL(UBiDi * pBiDi)1441 lastL_R_AL(UBiDi *pBiDi) {
1442     /* return last strong char at the end of the prologue */
1443     const UChar *text=pBiDi->prologue;
1444     int32_t length=pBiDi->proLength;
1445     int32_t i;
1446     UChar32 uchar;
1447     DirProp dirProp;
1448     for(i=length; i>0; ) {
1449         /* i is decremented by U16_PREV */
1450         U16_PREV(text, 0, i, uchar);
1451         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
1452         if(dirProp==L) {
1453             return DirProp_L;
1454         }
1455         if(dirProp==R || dirProp==AL) {
1456             return DirProp_R;
1457         }
1458         if(dirProp==B) {
1459             return DirProp_ON;
1460         }
1461     }
1462     return DirProp_ON;
1463 }
1464 
1465 static DirProp
firstL_R_AL_EN_AN(UBiDi * pBiDi)1466 firstL_R_AL_EN_AN(UBiDi *pBiDi) {
1467     /* return first strong char or digit in epilogue */
1468     const UChar *text=pBiDi->epilogue;
1469     int32_t length=pBiDi->epiLength;
1470     int32_t i;
1471     UChar32 uchar;
1472     DirProp dirProp;
1473     for(i=0; i<length; ) {
1474         /* i is incremented by U16_NEXT */
1475         U16_NEXT(text, i, length, uchar);
1476         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
1477         if(dirProp==L) {
1478             return DirProp_L;
1479         }
1480         if(dirProp==R || dirProp==AL) {
1481             return DirProp_R;
1482         }
1483         if(dirProp==EN) {
1484             return DirProp_EN;
1485         }
1486         if(dirProp==AN) {
1487             return DirProp_AN;
1488         }
1489     }
1490     return DirProp_ON;
1491 }
1492 
1493 static void
resolveImplicitLevels(UBiDi * pBiDi,int32_t start,int32_t limit,DirProp sor,DirProp eor)1494 resolveImplicitLevels(UBiDi *pBiDi,
1495                       int32_t start, int32_t limit,
1496                       DirProp sor, DirProp eor) {
1497     const DirProp *dirProps=pBiDi->dirProps;
1498 
1499     LevState levState;
1500     int32_t i, start1, start2;
1501     uint8_t oldStateImp, stateImp, actionImp;
1502     uint8_t gprop, resProp, cell;
1503     UBool inverseRTL;
1504     DirProp nextStrongProp=R;
1505     int32_t nextStrongPos=-1;
1506 
1507     levState.startON = -1;  /* silence gcc flow analysis */
1508 
1509     /* check for RTL inverse BiDi mode */
1510     /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
1511      * loop on the text characters from end to start.
1512      * This would need a different properties state table (at least different
1513      * actions) and different levels state tables (maybe very similar to the
1514      * LTR corresponding ones.
1515      */
1516     inverseRTL=(UBool)
1517         ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
1518          (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT  ||
1519           pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
1520     /* initialize for levels state table */
1521     levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1522     levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1523     levState.state=0;
1524     levState.runLevel=pBiDi->levels[start];
1525     levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
1526     levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
1527     if(start==0 && pBiDi->proLength>0) {
1528         DirProp lastStrong=lastL_R_AL(pBiDi);
1529         if(lastStrong!=DirProp_ON) {
1530             sor=lastStrong;
1531         }
1532     }
1533     processPropertySeq(pBiDi, &levState, sor, start, start);
1534     /* initialize for property state table */
1535     if(NO_CONTEXT_RTL(dirProps[start])==NSM) {
1536         stateImp = 1 + sor;
1537     } else {
1538         stateImp=0;
1539     }
1540     start1=start;
1541     start2=start;
1542 
1543     for(i=start; i<=limit; i++) {
1544         if(i>=limit) {
1545             gprop=eor;
1546         } else {
1547             DirProp prop, prop1;
1548             prop=NO_CONTEXT_RTL(dirProps[i]);
1549             if(inverseRTL) {
1550                 if(prop==AL) {
1551                     /* AL before EN does not make it AN */
1552                     prop=R;
1553                 } else if(prop==EN) {
1554                     if(nextStrongPos<=i) {
1555                         /* look for next strong char (L/R/AL) */
1556                         int32_t j;
1557                         nextStrongProp=R;   /* set default */
1558                         nextStrongPos=limit;
1559                         for(j=i+1; j<limit; j++) {
1560                             prop1=NO_CONTEXT_RTL(dirProps[j]);
1561                             if(prop1==L || prop1==R || prop1==AL) {
1562                                 nextStrongProp=prop1;
1563                                 nextStrongPos=j;
1564                                 break;
1565                             }
1566                         }
1567                     }
1568                     if(nextStrongProp==AL) {
1569                         prop=AN;
1570                     }
1571                 }
1572             }
1573             gprop=groupProp[prop];
1574         }
1575         oldStateImp=stateImp;
1576         cell=impTabProps[oldStateImp][gprop];
1577         stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
1578         actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
1579         if((i==limit) && (actionImp==0)) {
1580             /* there is an unprocessed sequence if its property == eor   */
1581             actionImp=1;                    /* process the last sequence */
1582         }
1583         if(actionImp) {
1584             resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
1585             switch(actionImp) {
1586             case 1:             /* process current seq1, init new seq1 */
1587                 processPropertySeq(pBiDi, &levState, resProp, start1, i);
1588                 start1=i;
1589                 break;
1590             case 2:             /* init new seq2 */
1591                 start2=i;
1592                 break;
1593             case 3:             /* process seq1, process seq2, init new seq1 */
1594                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
1595                 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
1596                 start1=i;
1597                 break;
1598             case 4:             /* process seq1, set seq1=seq2, init new seq2 */
1599                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
1600                 start1=start2;
1601                 start2=i;
1602                 break;
1603             default:            /* we should never get here */
1604                 U_ASSERT(FALSE);
1605                 break;
1606             }
1607         }
1608     }
1609     /* flush possible pending sequence, e.g. ON */
1610     if(limit==pBiDi->length && pBiDi->epiLength>0) {
1611         DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
1612         if(firstStrong!=DirProp_ON) {
1613             eor=firstStrong;
1614         }
1615     }
1616     processPropertySeq(pBiDi, &levState, eor, limit, limit);
1617 }
1618 
1619 /* perform (L1) and (X9) ---------------------------------------------------- */
1620 
1621 /*
1622  * Reset the embedding levels for some non-graphic characters (L1).
1623  * This function also sets appropriate levels for BN, and
1624  * explicit embedding types that are supposed to have been removed
1625  * from the paragraph in (X9).
1626  */
1627 static void
adjustWSLevels(UBiDi * pBiDi)1628 adjustWSLevels(UBiDi *pBiDi) {
1629     const DirProp *dirProps=pBiDi->dirProps;
1630     UBiDiLevel *levels=pBiDi->levels;
1631     int32_t i;
1632 
1633     if(pBiDi->flags&MASK_WS) {
1634         UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
1635         Flags flag;
1636 
1637         i=pBiDi->trailingWSStart;
1638         while(i>0) {
1639             /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1640             while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) {
1641                 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
1642                     levels[i]=0;
1643                 } else {
1644                     levels[i]=GET_PARALEVEL(pBiDi, i);
1645                 }
1646             }
1647 
1648             /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1649             /* here, i+1 is guaranteed to be <length */
1650             while(i>0) {
1651                 flag=DIRPROP_FLAG_NC(dirProps[--i]);
1652                 if(flag&MASK_BN_EXPLICIT) {
1653                     levels[i]=levels[i+1];
1654                 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
1655                     levels[i]=0;
1656                     break;
1657                 } else if(flag&MASK_B_S) {
1658                     levels[i]=GET_PARALEVEL(pBiDi, i);
1659                     break;
1660                 }
1661             }
1662         }
1663     }
1664 }
1665 
1666 U_DRAFT void U_EXPORT2
ubidi_setContext(UBiDi * pBiDi,const UChar * prologue,int32_t proLength,const UChar * epilogue,int32_t epiLength,UErrorCode * pErrorCode)1667 ubidi_setContext(UBiDi *pBiDi,
1668                  const UChar *prologue, int32_t proLength,
1669                  const UChar *epilogue, int32_t epiLength,
1670                  UErrorCode *pErrorCode) {
1671     /* check the argument values */
1672     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
1673     if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
1674        (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
1675         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1676         return;
1677     }
1678 
1679     if(proLength==-1) {
1680         pBiDi->proLength=u_strlen(prologue);
1681     } else {
1682         pBiDi->proLength=proLength;
1683     }
1684     if(epiLength==-1) {
1685         pBiDi->epiLength=u_strlen(epilogue);
1686     } else {
1687         pBiDi->epiLength=epiLength;
1688     }
1689     pBiDi->prologue=prologue;
1690     pBiDi->epilogue=epilogue;
1691 }
1692 
1693 static void
setParaSuccess(UBiDi * pBiDi)1694 setParaSuccess(UBiDi *pBiDi) {
1695     pBiDi->proLength=0;                 /* forget the last context */
1696     pBiDi->epiLength=0;
1697     pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
1698 }
1699 
1700 #define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
1701 #define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
1702 static void
setParaRunsOnly(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UErrorCode * pErrorCode)1703 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
1704                 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
1705     void *runsOnlyMemory;
1706     int32_t *visualMap;
1707     UChar *visualText;
1708     int32_t saveLength, saveTrailingWSStart;
1709     const UBiDiLevel *levels;
1710     UBiDiLevel *saveLevels;
1711     UBiDiDirection saveDirection;
1712     UBool saveMayAllocateText;
1713     Run *runs;
1714     int32_t visualLength, i, j, visualStart, logicalStart,
1715             runCount, runLength, addedRuns, insertRemove,
1716             start, limit, step, indexOddBit, logicalPos,
1717             index0, index1;
1718     uint32_t saveOptions;
1719 
1720     pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
1721     if(length==0) {
1722         ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
1723         goto cleanup3;
1724     }
1725     /* obtain memory for mapping table and visual text */
1726     runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
1727     if(runsOnlyMemory==NULL) {
1728         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1729         goto cleanup3;
1730     }
1731     visualMap=runsOnlyMemory;
1732     visualText=(UChar *)&visualMap[length];
1733     saveLevels=(UBiDiLevel *)&visualText[length];
1734     saveOptions=pBiDi->reorderingOptions;
1735     if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
1736         pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
1737         pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
1738     }
1739     paraLevel&=1;                       /* accept only 0 or 1 */
1740     ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
1741     if(U_FAILURE(*pErrorCode)) {
1742         goto cleanup3;
1743     }
1744     /* we cannot access directly pBiDi->levels since it is not yet set if
1745      * direction is not MIXED
1746      */
1747     levels=ubidi_getLevels(pBiDi, pErrorCode);
1748     uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
1749     saveTrailingWSStart=pBiDi->trailingWSStart;
1750     saveLength=pBiDi->length;
1751     saveDirection=pBiDi->direction;
1752 
1753     /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
1754      * the visual map and the dirProps array to drive the second call
1755      * to ubidi_setPara (but must make provision for possible removal of
1756      * BiDi controls.  Alternatively, only use the dirProps array via
1757      * customized classifier callback.
1758      */
1759     visualLength=ubidi_writeReordered(pBiDi, visualText, length,
1760                                       UBIDI_DO_MIRRORING, pErrorCode);
1761     ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
1762     if(U_FAILURE(*pErrorCode)) {
1763         goto cleanup2;
1764     }
1765     pBiDi->reorderingOptions=saveOptions;
1766 
1767     pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
1768     paraLevel^=1;
1769     /* Because what we did with reorderingOptions, visualText may be shorter
1770      * than the original text. But we don't want the levels memory to be
1771      * reallocated shorter than the original length, since we need to restore
1772      * the levels as after the first call to ubidi_setpara() before returning.
1773      * We will force mayAllocateText to FALSE before the second call to
1774      * ubidi_setpara(), and will restore it afterwards.
1775      */
1776     saveMayAllocateText=pBiDi->mayAllocateText;
1777     pBiDi->mayAllocateText=FALSE;
1778     ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
1779     pBiDi->mayAllocateText=saveMayAllocateText;
1780     ubidi_getRuns(pBiDi, pErrorCode);
1781     if(U_FAILURE(*pErrorCode)) {
1782         goto cleanup1;
1783     }
1784     /* check if some runs must be split, count how many splits */
1785     addedRuns=0;
1786     runCount=pBiDi->runCount;
1787     runs=pBiDi->runs;
1788     visualStart=0;
1789     for(i=0; i<runCount; i++, visualStart+=runLength) {
1790         runLength=runs[i].visualLimit-visualStart;
1791         if(runLength<2) {
1792             continue;
1793         }
1794         logicalStart=GET_INDEX(runs[i].logicalStart);
1795         for(j=logicalStart+1; j<logicalStart+runLength; j++) {
1796             index0=visualMap[j];
1797             index1=visualMap[j-1];
1798             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
1799                 addedRuns++;
1800             }
1801         }
1802     }
1803     if(addedRuns) {
1804         if(getRunsMemory(pBiDi, runCount+addedRuns)) {
1805             if(runCount==1) {
1806                 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
1807                 pBiDi->runsMemory[0]=runs[0];
1808             }
1809             runs=pBiDi->runs=pBiDi->runsMemory;
1810             pBiDi->runCount+=addedRuns;
1811         } else {
1812             goto cleanup1;
1813         }
1814     }
1815     /* split runs which are not consecutive in source text */
1816     for(i=runCount-1; i>=0; i--) {
1817         runLength= i==0 ? runs[0].visualLimit :
1818                           runs[i].visualLimit-runs[i-1].visualLimit;
1819         logicalStart=runs[i].logicalStart;
1820         indexOddBit=GET_ODD_BIT(logicalStart);
1821         logicalStart=GET_INDEX(logicalStart);
1822         if(runLength<2) {
1823             if(addedRuns) {
1824                 runs[i+addedRuns]=runs[i];
1825             }
1826             logicalPos=visualMap[logicalStart];
1827             runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1828                                             saveLevels[logicalPos]^indexOddBit);
1829             continue;
1830         }
1831         if(indexOddBit) {
1832             start=logicalStart;
1833             limit=logicalStart+runLength-1;
1834             step=1;
1835         } else {
1836             start=logicalStart+runLength-1;
1837             limit=logicalStart;
1838             step=-1;
1839         }
1840         for(j=start; j!=limit; j+=step) {
1841             index0=visualMap[j];
1842             index1=visualMap[j+step];
1843             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
1844                 logicalPos=BIDI_MIN(visualMap[start], index0);
1845                 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1846                                             saveLevels[logicalPos]^indexOddBit);
1847                 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
1848                 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
1849                 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
1850                 runs[i+addedRuns].insertRemove=insertRemove;
1851                 runs[i].insertRemove&=~insertRemove;
1852                 start=j+step;
1853                 addedRuns--;
1854             }
1855         }
1856         if(addedRuns) {
1857             runs[i+addedRuns]=runs[i];
1858         }
1859         logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
1860         runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1861                                             saveLevels[logicalPos]^indexOddBit);
1862     }
1863 
1864   cleanup1:
1865     /* restore initial paraLevel */
1866     pBiDi->paraLevel^=1;
1867   cleanup2:
1868     /* restore real text */
1869     pBiDi->text=text;
1870     pBiDi->length=saveLength;
1871     pBiDi->originalLength=length;
1872     pBiDi->direction=saveDirection;
1873     /* the saved levels should never excess levelsSize, but we check anyway */
1874     if(saveLength>pBiDi->levelsSize) {
1875         saveLength=pBiDi->levelsSize;
1876     }
1877     uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
1878     pBiDi->trailingWSStart=saveTrailingWSStart;
1879     /* free memory for mapping table and visual text */
1880     uprv_free(runsOnlyMemory);
1881     if(pBiDi->runCount>1) {
1882         pBiDi->direction=UBIDI_MIXED;
1883     }
1884   cleanup3:
1885     pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
1886 }
1887 
1888 /* ubidi_setPara ------------------------------------------------------------ */
1889 
1890 U_CAPI void U_EXPORT2
ubidi_setPara(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * pErrorCode)1891 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
1892               UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
1893               UErrorCode *pErrorCode) {
1894     UBiDiDirection direction;
1895 
1896     /* check the argument values */
1897     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
1898     if(pBiDi==NULL || text==NULL || length<-1 ||
1899        (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
1900         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1901         return;
1902     }
1903 
1904     if(length==-1) {
1905         length=u_strlen(text);
1906     }
1907 
1908     /* special treatment for RUNS_ONLY mode */
1909     if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
1910         setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
1911         return;
1912     }
1913 
1914     /* initialize the UBiDi structure */
1915     pBiDi->pParaBiDi=NULL;          /* mark unfinished setPara */
1916     pBiDi->text=text;
1917     pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
1918     pBiDi->paraLevel=paraLevel;
1919     pBiDi->direction=UBIDI_LTR;
1920     pBiDi->paraCount=1;
1921 
1922     pBiDi->dirProps=NULL;
1923     pBiDi->levels=NULL;
1924     pBiDi->runs=NULL;
1925     pBiDi->insertPoints.size=0;         /* clean up from last call */
1926     pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
1927 
1928     /*
1929      * Save the original paraLevel if contextual; otherwise, set to 0.
1930      */
1931     if(IS_DEFAULT_LEVEL(paraLevel)) {
1932         pBiDi->defaultParaLevel=paraLevel;
1933     } else {
1934         pBiDi->defaultParaLevel=0;
1935     }
1936 
1937     if(length==0) {
1938         /*
1939          * For an empty paragraph, create a UBiDi object with the paraLevel and
1940          * the flags and the direction set but without allocating zero-length arrays.
1941          * There is nothing more to do.
1942          */
1943         if(IS_DEFAULT_LEVEL(paraLevel)) {
1944             pBiDi->paraLevel&=1;
1945             pBiDi->defaultParaLevel=0;
1946         }
1947         if(paraLevel&1) {
1948             pBiDi->flags=DIRPROP_FLAG(R);
1949             pBiDi->direction=UBIDI_RTL;
1950         } else {
1951             pBiDi->flags=DIRPROP_FLAG(L);
1952             pBiDi->direction=UBIDI_LTR;
1953         }
1954 
1955         pBiDi->runCount=0;
1956         pBiDi->paraCount=0;
1957         setParaSuccess(pBiDi);          /* mark successful setPara */
1958         return;
1959     }
1960 
1961     pBiDi->runCount=-1;
1962 
1963     /*
1964      * Get the directional properties,
1965      * the flags bit-set, and
1966      * determine the paragraph level if necessary.
1967      */
1968     if(getDirPropsMemory(pBiDi, length)) {
1969         pBiDi->dirProps=pBiDi->dirPropsMemory;
1970         getDirProps(pBiDi);
1971     } else {
1972         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1973         return;
1974     }
1975     /* the processed length may have changed if UBIDI_OPTION_STREAMING */
1976     length= pBiDi->length;
1977     pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
1978     /* allocate paras memory */
1979     if(pBiDi->paraCount>1) {
1980         if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) {
1981             pBiDi->paras=pBiDi->parasMemory;
1982             pBiDi->paras[pBiDi->paraCount-1]=length;
1983         } else {
1984             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1985             return;
1986         }
1987     } else {
1988         /* initialize paras for single paragraph */
1989         pBiDi->paras=pBiDi->simpleParas;
1990         pBiDi->simpleParas[0]=length;
1991     }
1992 
1993     /* are explicit levels specified? */
1994     if(embeddingLevels==NULL) {
1995         /* no: determine explicit levels according to the (Xn) rules */\
1996         if(getLevelsMemory(pBiDi, length)) {
1997             pBiDi->levels=pBiDi->levelsMemory;
1998             direction=resolveExplicitLevels(pBiDi);
1999         } else {
2000             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2001             return;
2002         }
2003     } else {
2004         /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2005         pBiDi->levels=embeddingLevels;
2006         direction=checkExplicitLevels(pBiDi, pErrorCode);
2007         if(U_FAILURE(*pErrorCode)) {
2008             return;
2009         }
2010     }
2011 
2012     /*
2013      * The steps after (X9) in the UBiDi algorithm are performed only if
2014      * the paragraph text has mixed directionality!
2015      */
2016     pBiDi->direction=direction;
2017     switch(direction) {
2018     case UBIDI_LTR:
2019         /* make sure paraLevel is even */
2020         pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
2021 
2022         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2023         pBiDi->trailingWSStart=0;
2024         break;
2025     case UBIDI_RTL:
2026         /* make sure paraLevel is odd */
2027         pBiDi->paraLevel|=1;
2028 
2029         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2030         pBiDi->trailingWSStart=0;
2031         break;
2032     default:
2033         /*
2034          *  Choose the right implicit state table
2035          */
2036         switch(pBiDi->reorderingMode) {
2037         case UBIDI_REORDER_DEFAULT:
2038             pBiDi->pImpTabPair=&impTab_DEFAULT;
2039             break;
2040         case UBIDI_REORDER_NUMBERS_SPECIAL:
2041             pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
2042             break;
2043         case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
2044             pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
2045             break;
2046         case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
2047             pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
2048             break;
2049         case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
2050             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2051                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
2052             } else {
2053                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
2054             }
2055             break;
2056         case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
2057             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2058                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
2059             } else {
2060                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
2061             }
2062             break;
2063         default:
2064             /* we should never get here */
2065             U_ASSERT(FALSE);
2066             break;
2067         }
2068         /*
2069          * If there are no external levels specified and there
2070          * are no significant explicit level codes in the text,
2071          * then we can treat the entire paragraph as one run.
2072          * Otherwise, we need to perform the following rules on runs of
2073          * the text with the same embedding levels. (X10)
2074          * "Significant" explicit level codes are ones that actually
2075          * affect non-BN characters.
2076          * Examples for "insignificant" ones are empty embeddings
2077          * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2078          */
2079         if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
2080                                    !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
2081             resolveImplicitLevels(pBiDi, 0, length,
2082                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
2083                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
2084         } else {
2085             /* sor, eor: start and end types of same-level-run */
2086             UBiDiLevel *levels=pBiDi->levels;
2087             int32_t start, limit=0;
2088             UBiDiLevel level, nextLevel;
2089             DirProp sor, eor;
2090 
2091             /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2092             level=GET_PARALEVEL(pBiDi, 0);
2093             nextLevel=levels[0];
2094             if(level<nextLevel) {
2095                 eor=GET_LR_FROM_LEVEL(nextLevel);
2096             } else {
2097                 eor=GET_LR_FROM_LEVEL(level);
2098             }
2099 
2100             do {
2101                 /* determine start and limit of the run (end points just behind the run) */
2102 
2103                 /* the values for this run's start are the same as for the previous run's end */
2104                 start=limit;
2105                 level=nextLevel;
2106                 if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) {
2107                     /* except if this is a new paragraph, then set sor = para level */
2108                     sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
2109                 } else {
2110                     sor=eor;
2111                 }
2112 
2113                 /* search for the limit of this run */
2114                 while(++limit<length && levels[limit]==level) {}
2115 
2116                 /* get the correct level of the next run */
2117                 if(limit<length) {
2118                     nextLevel=levels[limit];
2119                 } else {
2120                     nextLevel=GET_PARALEVEL(pBiDi, length-1);
2121                 }
2122 
2123                 /* determine eor from max(level, nextLevel); sor is last run's eor */
2124                 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
2125                     eor=GET_LR_FROM_LEVEL(nextLevel);
2126                 } else {
2127                     eor=GET_LR_FROM_LEVEL(level);
2128                 }
2129 
2130                 /* if the run consists of overridden directional types, then there
2131                    are no implicit types to be resolved */
2132                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2133                     resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2134                 } else {
2135                     /* remove the UBIDI_LEVEL_OVERRIDE flags */
2136                     do {
2137                         levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2138                     } while(start<limit);
2139                 }
2140             } while(limit<length);
2141         }
2142         /* check if we got any memory shortage while adding insert points */
2143         if (U_FAILURE(pBiDi->insertPoints.errorCode))
2144         {
2145             *pErrorCode=pBiDi->insertPoints.errorCode;
2146             return;
2147         }
2148         /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2149         adjustWSLevels(pBiDi);
2150         break;
2151     }
2152     /* add RLM for inverse Bidi with contextual orientation resolving
2153      * to RTL which would not round-trip otherwise
2154      */
2155     if((pBiDi->defaultParaLevel>0) &&
2156        (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2157        ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2158         (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2159         int32_t i, j, start, last;
2160         DirProp dirProp;
2161         for(i=0; i<pBiDi->paraCount; i++) {
2162             last=pBiDi->paras[i]-1;
2163             if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) {
2164                 continue;           /* LTR paragraph */
2165             }
2166             start= i==0 ? 0 : pBiDi->paras[i - 1];
2167             for(j=last; j>=start; j--) {
2168                 dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]);
2169                 if(dirProp==L) {
2170                     if(j<last) {
2171                         while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) {
2172                             last--;
2173                         }
2174                     }
2175                     addPoint(pBiDi, last, RLM_BEFORE);
2176                     break;
2177                 }
2178                 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2179                     break;
2180                 }
2181             }
2182         }
2183     }
2184 
2185     if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2186         pBiDi->resultLength -= pBiDi->controlCount;
2187     } else {
2188         pBiDi->resultLength += pBiDi->insertPoints.size;
2189     }
2190     setParaSuccess(pBiDi);              /* mark successful setPara */
2191 }
2192 
2193 U_CAPI void U_EXPORT2
ubidi_orderParagraphsLTR(UBiDi * pBiDi,UBool orderParagraphsLTR)2194 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2195     if(pBiDi!=NULL) {
2196         pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2197     }
2198 }
2199 
2200 U_CAPI UBool U_EXPORT2
ubidi_isOrderParagraphsLTR(UBiDi * pBiDi)2201 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2202     if(pBiDi!=NULL) {
2203         return pBiDi->orderParagraphsLTR;
2204     } else {
2205         return FALSE;
2206     }
2207 }
2208 
2209 U_CAPI UBiDiDirection U_EXPORT2
ubidi_getDirection(const UBiDi * pBiDi)2210 ubidi_getDirection(const UBiDi *pBiDi) {
2211     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2212         return pBiDi->direction;
2213     } else {
2214         return UBIDI_LTR;
2215     }
2216 }
2217 
2218 U_CAPI const UChar * U_EXPORT2
ubidi_getText(const UBiDi * pBiDi)2219 ubidi_getText(const UBiDi *pBiDi) {
2220     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2221         return pBiDi->text;
2222     } else {
2223         return NULL;
2224     }
2225 }
2226 
2227 U_CAPI int32_t U_EXPORT2
ubidi_getLength(const UBiDi * pBiDi)2228 ubidi_getLength(const UBiDi *pBiDi) {
2229     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2230         return pBiDi->originalLength;
2231     } else {
2232         return 0;
2233     }
2234 }
2235 
2236 U_CAPI int32_t U_EXPORT2
ubidi_getProcessedLength(const UBiDi * pBiDi)2237 ubidi_getProcessedLength(const UBiDi *pBiDi) {
2238     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2239         return pBiDi->length;
2240     } else {
2241         return 0;
2242     }
2243 }
2244 
2245 U_CAPI int32_t U_EXPORT2
ubidi_getResultLength(const UBiDi * pBiDi)2246 ubidi_getResultLength(const UBiDi *pBiDi) {
2247     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2248         return pBiDi->resultLength;
2249     } else {
2250         return 0;
2251     }
2252 }
2253 
2254 /* paragraphs API functions ------------------------------------------------- */
2255 
2256 U_CAPI UBiDiLevel U_EXPORT2
ubidi_getParaLevel(const UBiDi * pBiDi)2257 ubidi_getParaLevel(const UBiDi *pBiDi) {
2258     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2259         return pBiDi->paraLevel;
2260     } else {
2261         return 0;
2262     }
2263 }
2264 
2265 U_CAPI int32_t U_EXPORT2
ubidi_countParagraphs(UBiDi * pBiDi)2266 ubidi_countParagraphs(UBiDi *pBiDi) {
2267     if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2268         return 0;
2269     } else {
2270         return pBiDi->paraCount;
2271     }
2272 }
2273 
2274 U_CAPI void U_EXPORT2
ubidi_getParagraphByIndex(const UBiDi * pBiDi,int32_t paraIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2275 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2276                           int32_t *pParaStart, int32_t *pParaLimit,
2277                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2278     int32_t paraStart;
2279 
2280     /* check the argument values */
2281     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2282     RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2283     RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2284 
2285     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2286     if(paraIndex) {
2287         paraStart=pBiDi->paras[paraIndex-1];
2288     } else {
2289         paraStart=0;
2290     }
2291     if(pParaStart!=NULL) {
2292         *pParaStart=paraStart;
2293     }
2294     if(pParaLimit!=NULL) {
2295         *pParaLimit=pBiDi->paras[paraIndex];
2296     }
2297     if(pParaLevel!=NULL) {
2298         *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2299     }
2300 }
2301 
2302 U_CAPI int32_t U_EXPORT2
ubidi_getParagraph(const UBiDi * pBiDi,int32_t charIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2303 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2304                           int32_t *pParaStart, int32_t *pParaLimit,
2305                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2306     uint32_t paraIndex;
2307 
2308     /* check the argument values */
2309     /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2310     RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2311     RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2312     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2313     RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2314 
2315     for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++);
2316     ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2317     return paraIndex;
2318 }
2319 
2320 U_CAPI void U_EXPORT2
ubidi_setClassCallback(UBiDi * pBiDi,UBiDiClassCallback * newFn,const void * newContext,UBiDiClassCallback ** oldFn,const void ** oldContext,UErrorCode * pErrorCode)2321 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2322                        const void *newContext, UBiDiClassCallback **oldFn,
2323                        const void **oldContext, UErrorCode *pErrorCode)
2324 {
2325     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2326     if(pBiDi==NULL) {
2327         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2328         return;
2329     }
2330     if( oldFn )
2331     {
2332         *oldFn = pBiDi->fnClassCallback;
2333     }
2334     if( oldContext )
2335     {
2336         *oldContext = pBiDi->coClassCallback;
2337     }
2338     pBiDi->fnClassCallback = newFn;
2339     pBiDi->coClassCallback = newContext;
2340 }
2341 
2342 U_CAPI void U_EXPORT2
ubidi_getClassCallback(UBiDi * pBiDi,UBiDiClassCallback ** fn,const void ** context)2343 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
2344 {
2345     if(pBiDi==NULL) {
2346         return;
2347     }
2348     if( fn )
2349     {
2350         *fn = pBiDi->fnClassCallback;
2351     }
2352     if( context )
2353     {
2354         *context = pBiDi->coClassCallback;
2355     }
2356 }
2357 
2358 U_CAPI UCharDirection U_EXPORT2
ubidi_getCustomizedClass(UBiDi * pBiDi,UChar32 c)2359 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
2360 {
2361     UCharDirection dir;
2362 
2363     if( pBiDi->fnClassCallback == NULL ||
2364         (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
2365     {
2366         return ubidi_getClass(pBiDi->bdp, c);
2367     } else {
2368         return dir;
2369     }
2370 }
2371 
2372