1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1999-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 * file name: ubidi.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 1999jul27
14 * created by: Markus W. Scherer, updated by Matitiahu Allouche
15 */
16
17 #include "cmemory.h"
18 #include "unicode/utypes.h"
19 #include "unicode/ustring.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ubidi.h"
22 #include "ubidi_props.h"
23 #include "ubidiimp.h"
24 #include "uassert.h"
25
26 /*
27 * General implementation notes:
28 *
29 * Throughout the implementation, there are comments like (W2) that refer to
30 * rules of the BiDi algorithm in its version 5, in this example to the second
31 * rule of the resolution of weak types.
32 *
33 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
34 * character according to UTF-16, the second UChar gets the directional property of
35 * the entire character assigned, while the first one gets a BN, a boundary
36 * neutral, type, which is ignored by most of the algorithm according to
37 * rule (X9) and the implementation suggestions of the BiDi algorithm.
38 *
39 * Later, adjustWSLevels() will set the level for each BN to that of the
40 * following character (UChar), which results in surrogate pairs getting the
41 * same level on each of their surrogates.
42 *
43 * In a UTF-8 implementation, the same thing could be done: the last byte of
44 * a multi-byte sequence would get the "real" property, while all previous
45 * bytes of that sequence would get BN.
46 *
47 * It is not possible to assign all those parts of a character the same real
48 * property because this would fail in the resolution of weak types with rules
49 * that look at immediately surrounding types.
50 *
51 * As a related topic, this implementation does not remove Boundary Neutral
52 * types from the input, but ignores them wherever this is relevant.
53 * For example, the loop for the resolution of the weak types reads
54 * types until it finds a non-BN.
55 * Also, explicit embedding codes are neither changed into BN nor removed.
56 * They are only treated the same way real BNs are.
57 * As stated before, adjustWSLevels() takes care of them at the end.
58 * For the purpose of conformance, the levels of all these codes
59 * do not matter.
60 *
61 * Note that this implementation never modifies the dirProps
62 * after the initial setup.
63 *
64 *
65 * In this implementation, the resolution of weak types (Wn),
66 * neutrals (Nn), and the assignment of the resolved level (In)
67 * are all done in one single loop, in resolveImplicitLevels().
68 * Changes of dirProp values are done on the fly, without writing
69 * them back to the dirProps array.
70 *
71 *
72 * This implementation contains code that allows to bypass steps of the
73 * algorithm that are not needed on the specific paragraph
74 * in order to speed up the most common cases considerably,
75 * like text that is entirely LTR, or RTL text without numbers.
76 *
77 * Most of this is done by setting a bit for each directional property
78 * in a flags variable and later checking for whether there are
79 * any LTR characters or any RTL characters, or both, whether
80 * there are any explicit embedding codes, etc.
81 *
82 * If the (Xn) steps are performed, then the flags are re-evaluated,
83 * because they will then not contain the embedding codes any more
84 * and will be adjusted for override codes, so that subsequently
85 * more bypassing may be possible than what the initial flags suggested.
86 *
87 * If the text is not mixed-directional, then the
88 * algorithm steps for the weak type resolution are not performed,
89 * and all levels are set to the paragraph level.
90 *
91 * If there are no explicit embedding codes, then the (Xn) steps
92 * are not performed.
93 *
94 * If embedding levels are supplied as a parameter, then all
95 * explicit embedding codes are ignored, and the (Xn) steps
96 * are not performed.
97 *
98 * White Space types could get the level of the run they belong to,
99 * and are checked with a test of (flags&MASK_EMBEDDING) to
100 * consider if the paragraph direction should be considered in
101 * the flags variable.
102 *
103 * If there are no White Space types in the paragraph, then
104 * (L1) is not necessary in adjustWSLevels().
105 */
106
107 /* to avoid some conditional statements, use tiny constant arrays */
108 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
109 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
110 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
111
112 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
113 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
114 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
115
116 /* UBiDi object management -------------------------------------------------- */
117
118 U_CAPI UBiDi * U_EXPORT2
ubidi_open(void)119 ubidi_open(void)
120 {
121 UErrorCode errorCode=U_ZERO_ERROR;
122 return ubidi_openSized(0, 0, &errorCode);
123 }
124
125 U_CAPI UBiDi * U_EXPORT2
ubidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)126 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
127 UBiDi *pBiDi;
128
129 /* check the argument values */
130 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
131 return NULL;
132 } else if(maxLength<0 || maxRunCount<0) {
133 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
134 return NULL; /* invalid arguments */
135 }
136
137 /* allocate memory for the object */
138 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
139 if(pBiDi==NULL) {
140 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
141 return NULL;
142 }
143
144 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
145 uprv_memset(pBiDi, 0, sizeof(UBiDi));
146
147 /* get BiDi properties */
148 pBiDi->bdp=ubidi_getSingleton(pErrorCode);
149 if(U_FAILURE(*pErrorCode)) {
150 uprv_free(pBiDi);
151 return NULL;
152 }
153
154 /* allocate memory for arrays as requested */
155 if(maxLength>0) {
156 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
157 !getInitialLevelsMemory(pBiDi, maxLength)
158 ) {
159 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
160 }
161 } else {
162 pBiDi->mayAllocateText=TRUE;
163 }
164
165 if(maxRunCount>0) {
166 if(maxRunCount==1) {
167 /* use simpleRuns[] */
168 pBiDi->runsSize=sizeof(Run);
169 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
170 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
171 }
172 } else {
173 pBiDi->mayAllocateRuns=TRUE;
174 }
175
176 if(U_SUCCESS(*pErrorCode)) {
177 return pBiDi;
178 } else {
179 ubidi_close(pBiDi);
180 return NULL;
181 }
182 }
183
184 /*
185 * We are allowed to allocate memory if memory==NULL or
186 * mayAllocate==TRUE for each array that we need.
187 * We also try to grow and shrink memory as needed if we
188 * allocate it.
189 *
190 * Assume sizeNeeded>0.
191 * If *pMemory!=NULL, then assume *pSize>0.
192 *
193 * ### this realloc() may unnecessarily copy the old data,
194 * which we know we don't need any more;
195 * is this the best way to do this??
196 */
197 U_CFUNC UBool
ubidi_getMemory(BidiMemoryForAllocation * bidiMem,int32_t * pSize,UBool mayAllocate,int32_t sizeNeeded)198 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
199 void **pMemory = (void **)bidiMem;
200 /* check for existing memory */
201 if(*pMemory==NULL) {
202 /* we need to allocate memory */
203 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
204 *pSize=sizeNeeded;
205 return TRUE;
206 } else {
207 return FALSE;
208 }
209 } else {
210 /* there is some memory, is it enough or too much? */
211 if(sizeNeeded>*pSize && !mayAllocate) {
212 /* not enough memory, and we must not allocate */
213 return FALSE;
214 } else if(sizeNeeded!=*pSize && mayAllocate) {
215 /* FOOD FOR THOUGHT: in hope to improve performance, we should
216 * try never shrinking memory, only growing it when required.
217 */
218 /* we may try to grow or shrink */
219 void *memory;
220
221 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
222 *pMemory=memory;
223 *pSize=sizeNeeded;
224 return TRUE;
225 } else {
226 /* we failed to grow */
227 return FALSE;
228 }
229 } else {
230 /* we have at least enough memory and must not allocate */
231 return TRUE;
232 }
233 }
234 }
235
236 U_CAPI void U_EXPORT2
ubidi_close(UBiDi * pBiDi)237 ubidi_close(UBiDi *pBiDi) {
238 if(pBiDi!=NULL) {
239 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
240 if(pBiDi->dirPropsMemory!=NULL) {
241 uprv_free(pBiDi->dirPropsMemory);
242 }
243 if(pBiDi->levelsMemory!=NULL) {
244 uprv_free(pBiDi->levelsMemory);
245 }
246 if(pBiDi->runsMemory!=NULL) {
247 uprv_free(pBiDi->runsMemory);
248 }
249 if(pBiDi->parasMemory!=NULL) {
250 uprv_free(pBiDi->parasMemory);
251 }
252 if(pBiDi->insertPoints.points!=NULL) {
253 uprv_free(pBiDi->insertPoints.points);
254 }
255
256 uprv_free(pBiDi);
257 }
258 }
259
260 /* set to approximate "inverse BiDi" ---------------------------------------- */
261
262 U_CAPI void U_EXPORT2
ubidi_setInverse(UBiDi * pBiDi,UBool isInverse)263 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
264 if(pBiDi!=NULL) {
265 pBiDi->isInverse=isInverse;
266 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
267 : UBIDI_REORDER_DEFAULT;
268 }
269 }
270
271 U_CAPI UBool U_EXPORT2
ubidi_isInverse(UBiDi * pBiDi)272 ubidi_isInverse(UBiDi *pBiDi) {
273 if(pBiDi!=NULL) {
274 return pBiDi->isInverse;
275 } else {
276 return FALSE;
277 }
278 }
279
280 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
281 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
282 * concept of RUNS_ONLY which is a double operation.
283 * It could be advantageous to divide this into 3 concepts:
284 * a) Operation: direct / inverse / RUNS_ONLY
285 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
286 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
287 * This would allow combinations not possible today like RUNS_ONLY with
288 * NUMBERS_SPECIAL.
289 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
290 * REMOVE_CONTROLS for the inverse step.
291 * Not all combinations would be supported, and probably not all do make sense.
292 * This would need to document which ones are supported and what are the
293 * fallbacks for unsupported combinations.
294 */
295 U_CAPI void U_EXPORT2
ubidi_setReorderingMode(UBiDi * pBiDi,UBiDiReorderingMode reorderingMode)296 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
297 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
298 && (reorderingMode < UBIDI_REORDER_COUNT)) {
299 pBiDi->reorderingMode = reorderingMode;
300 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
301 }
302 }
303
304 U_CAPI UBiDiReorderingMode U_EXPORT2
ubidi_getReorderingMode(UBiDi * pBiDi)305 ubidi_getReorderingMode(UBiDi *pBiDi) {
306 if (pBiDi!=NULL) {
307 return pBiDi->reorderingMode;
308 } else {
309 return UBIDI_REORDER_DEFAULT;
310 }
311 }
312
313 U_CAPI void U_EXPORT2
ubidi_setReorderingOptions(UBiDi * pBiDi,uint32_t reorderingOptions)314 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
315 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
316 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
317 }
318 if (pBiDi!=NULL) {
319 pBiDi->reorderingOptions=reorderingOptions;
320 }
321 }
322
323 U_CAPI uint32_t U_EXPORT2
ubidi_getReorderingOptions(UBiDi * pBiDi)324 ubidi_getReorderingOptions(UBiDi *pBiDi) {
325 if (pBiDi!=NULL) {
326 return pBiDi->reorderingOptions;
327 } else {
328 return 0;
329 }
330 }
331
332 /* perform (P2)..(P3) ------------------------------------------------------- */
333
334 /*
335 * Get the directional properties for the text,
336 * calculate the flags bit-set, and
337 * determine the paragraph level if necessary.
338 */
339 static void
getDirProps(UBiDi * pBiDi)340 getDirProps(UBiDi *pBiDi) {
341 const UChar *text=pBiDi->text;
342 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
343
344 int32_t i=0, i0, i1, length=pBiDi->originalLength;
345 Flags flags=0; /* collect all directionalities in the text */
346 UChar32 uchar;
347 DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */
348 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
349 /* for inverse BiDi, the default para level is set to RTL if there is a
350 strong R or AL character at either end of the text */
351 UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
352 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
353 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
354 int32_t lastArabicPos=-1;
355 int32_t controlCount=0;
356 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
357 UBIDI_OPTION_REMOVE_CONTROLS);
358
359 typedef enum {
360 NOT_CONTEXTUAL, /* 0: not contextual paraLevel */
361 LOOKING_FOR_STRONG, /* 1: looking for first strong char */
362 FOUND_STRONG_CHAR /* 2: found first strong char */
363 } State;
364 State state;
365 int32_t paraStart=0; /* index of first char in paragraph */
366 DirProp paraDir; /* == CONTEXT_RTL within paragraphs
367 starting with strong R char */
368 DirProp lastStrongDir=0; /* for default level & inverse BiDi */
369 int32_t lastStrongLTR=0; /* for STREAMING option */
370
371 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
372 pBiDi->length=0;
373 lastStrongLTR=0;
374 }
375 if(isDefaultLevel) {
376 paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0;
377 paraDir=paraDirDefault;
378 lastStrongDir=paraDirDefault;
379 state=LOOKING_FOR_STRONG;
380 } else {
381 state=NOT_CONTEXTUAL;
382 paraDir=0;
383 }
384 /* count paragraphs and determine the paragraph level (P2..P3) */
385 /*
386 * see comment in ubidi.h:
387 * the DEFAULT_XXX values are designed so that
388 * their bit 0 alone yields the intended default
389 */
390 for( /* i=0 above */ ; i<length; ) {
391 /* i is incremented by UTF_NEXT_CHAR */
392 i0=i; /* index of first code unit */
393 UTF_NEXT_CHAR(text, i, length, uchar);
394 i1=i-1; /* index of last code unit, gets the directional property */
395 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
396 dirProps[i1]=dirProp|paraDir;
397 if(i1>i0) { /* set previous code units' properties to BN */
398 flags|=DIRPROP_FLAG(BN);
399 do {
400 dirProps[--i1]=(DirProp)(BN|paraDir);
401 } while(i1>i0);
402 }
403 if(state==LOOKING_FOR_STRONG) {
404 if(dirProp==L) {
405 state=FOUND_STRONG_CHAR;
406 if(paraDir) {
407 paraDir=0;
408 for(i1=paraStart; i1<i; i1++) {
409 dirProps[i1]&=~CONTEXT_RTL;
410 }
411 }
412 continue;
413 }
414 if(dirProp==R || dirProp==AL) {
415 state=FOUND_STRONG_CHAR;
416 if(paraDir==0) {
417 paraDir=CONTEXT_RTL;
418 for(i1=paraStart; i1<i; i1++) {
419 dirProps[i1]|=CONTEXT_RTL;
420 }
421 }
422 continue;
423 }
424 }
425 if(dirProp==L) {
426 lastStrongDir=0;
427 lastStrongLTR=i; /* i is index to next character */
428 }
429 else if(dirProp==R) {
430 lastStrongDir=CONTEXT_RTL;
431 }
432 else if(dirProp==AL) {
433 lastStrongDir=CONTEXT_RTL;
434 lastArabicPos=i-1;
435 }
436 else if(dirProp==B) {
437 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
438 pBiDi->length=i; /* i is index to next character */
439 }
440 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
441 for( ; paraStart<i; paraStart++) {
442 dirProps[paraStart]|=CONTEXT_RTL;
443 }
444 }
445 if(i<length) { /* B not last char in text */
446 if(!((uchar==CR) && (text[i]==LF))) {
447 pBiDi->paraCount++;
448 }
449 if(isDefaultLevel) {
450 state=LOOKING_FOR_STRONG;
451 paraStart=i; /* i is index to next character */
452 paraDir=paraDirDefault;
453 lastStrongDir=paraDirDefault;
454 }
455 }
456 }
457 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) {
458 controlCount++;
459 }
460 }
461 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
462 for(i1=paraStart; i1<length; i1++) {
463 dirProps[i1]|=CONTEXT_RTL;
464 }
465 }
466 if(isDefaultLevel) {
467 pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0);
468 }
469 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
470 if((lastStrongLTR>pBiDi->length) &&
471 (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) {
472 pBiDi->length = lastStrongLTR;
473 }
474 if(pBiDi->length<pBiDi->originalLength) {
475 pBiDi->paraCount--;
476 }
477 }
478 /* The following line does nothing new for contextual paraLevel, but is
479 needed for absolute paraLevel. */
480 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
481
482 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
483 flags|=DIRPROP_FLAG(L);
484 }
485
486 pBiDi->controlCount = controlCount;
487 pBiDi->flags=flags;
488 pBiDi->lastArabicPos=lastArabicPos;
489 }
490
491 /* perform (X1)..(X9) ------------------------------------------------------- */
492
493 /* determine if the text is mixed-directional or single-directional */
494 static UBiDiDirection
directionFromFlags(UBiDi * pBiDi)495 directionFromFlags(UBiDi *pBiDi) {
496 Flags flags=pBiDi->flags;
497 /* if the text contains AN and neutrals, then some neutrals may become RTL */
498 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
499 return UBIDI_LTR;
500 } else if(!(flags&MASK_LTR)) {
501 return UBIDI_RTL;
502 } else {
503 return UBIDI_MIXED;
504 }
505 }
506
507 /*
508 * Resolve the explicit levels as specified by explicit embedding codes.
509 * Recalculate the flags to have them reflect the real properties
510 * after taking the explicit embeddings into account.
511 *
512 * The BiDi algorithm is designed to result in the same behavior whether embedding
513 * levels are externally specified (from "styled text", supposedly the preferred
514 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
515 * That is why (X9) instructs to remove all explicit codes (and BN).
516 * However, in a real implementation, this removal of these codes and their index
517 * positions in the plain text is undesirable since it would result in
518 * reallocated, reindexed text.
519 * Instead, this implementation leaves the codes in there and just ignores them
520 * in the subsequent processing.
521 * In order to get the same reordering behavior, positions with a BN or an
522 * explicit embedding code just get the same level assigned as the last "real"
523 * character.
524 *
525 * Some implementations, not this one, then overwrite some of these
526 * directionality properties at "real" same-level-run boundaries by
527 * L or R codes so that the resolution of weak types can be performed on the
528 * entire paragraph at once instead of having to parse it once more and
529 * perform that resolution on same-level-runs.
530 * This limits the scope of the implicit rules in effectively
531 * the same way as the run limits.
532 *
533 * Instead, this implementation does not modify these codes.
534 * On one hand, the paragraph has to be scanned for same-level-runs, but
535 * on the other hand, this saves another loop to reset these codes,
536 * or saves making and modifying a copy of dirProps[].
537 *
538 *
539 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
540 *
541 *
542 * Handling the stack of explicit levels (Xn):
543 *
544 * With the BiDi stack of explicit levels,
545 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
546 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
547 *
548 * In order to have a correct push-pop semantics even in the case of overflows,
549 * there are two overflow counters:
550 * - countOver60 is incremented with each LRx at level 60
551 * - from level 60, one RLx increases the level to 61
552 * - countOver61 is incremented with each LRx and RLx at level 61
553 *
554 * Popping levels with PDF must work in the opposite order so that level 61
555 * is correct at the correct point. Underflows (too many PDFs) must be checked.
556 *
557 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
558 */
559 static UBiDiDirection
resolveExplicitLevels(UBiDi * pBiDi)560 resolveExplicitLevels(UBiDi *pBiDi) {
561 const DirProp *dirProps=pBiDi->dirProps;
562 UBiDiLevel *levels=pBiDi->levels;
563 const UChar *text=pBiDi->text;
564
565 int32_t i=0, length=pBiDi->length;
566 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
567 DirProp dirProp;
568 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
569
570 UBiDiDirection direction;
571 int32_t paraIndex=0;
572
573 /* determine if the text is mixed-directional or single-directional */
574 direction=directionFromFlags(pBiDi);
575
576 /* we may not need to resolve any explicit levels, but for multiple
577 paragraphs we want to loop on all chars to set the para boundaries */
578 if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) {
579 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
580 } else if((pBiDi->paraCount==1) &&
581 (!(flags&MASK_EXPLICIT) ||
582 (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) {
583 /* mixed, but all characters are at the same embedding level */
584 /* or we are in "inverse BiDi" */
585 /* and we don't have contextual multiple paragraphs with some B char */
586 /* set all levels to the paragraph level */
587 for(i=0; i<length; ++i) {
588 levels[i]=level;
589 }
590 } else {
591 /* continue to perform (Xn) */
592
593 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
594 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
595 UBiDiLevel embeddingLevel=level, newLevel, stackTop=0;
596
597 UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
598 uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */
599
600 /* recalculate the flags */
601 flags=0;
602
603 for(i=0; i<length; ++i) {
604 dirProp=NO_CONTEXT_RTL(dirProps[i]);
605 switch(dirProp) {
606 case LRE:
607 case LRO:
608 /* (X3, X5) */
609 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
610 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
611 stack[stackTop]=embeddingLevel;
612 ++stackTop;
613 embeddingLevel=newLevel;
614 if(dirProp==LRO) {
615 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
616 }
617 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE
618 since this has already been done for newLevel which is
619 the source for embeddingLevel.
620 */
621 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) {
622 ++countOver61;
623 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
624 ++countOver60;
625 }
626 flags|=DIRPROP_FLAG(BN);
627 break;
628 case RLE:
629 case RLO:
630 /* (X2, X4) */
631 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
632 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
633 stack[stackTop]=embeddingLevel;
634 ++stackTop;
635 embeddingLevel=newLevel;
636 if(dirProp==RLO) {
637 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
638 }
639 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE
640 since this has already been done for newLevel which is
641 the source for embeddingLevel.
642 */
643 } else {
644 ++countOver61;
645 }
646 flags|=DIRPROP_FLAG(BN);
647 break;
648 case PDF:
649 /* (X7) */
650 /* handle all the overflow cases first */
651 if(countOver61>0) {
652 --countOver61;
653 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) {
654 /* handle LRx overflows from level 60 */
655 --countOver60;
656 } else if(stackTop>0) {
657 /* this is the pop operation; it also pops level 61 while countOver60>0 */
658 --stackTop;
659 embeddingLevel=stack[stackTop];
660 /* } else { (underflow) */
661 }
662 flags|=DIRPROP_FLAG(BN);
663 break;
664 case B:
665 stackTop=0;
666 countOver60=countOver61=0;
667 level=GET_PARALEVEL(pBiDi, i);
668 if((i+1)<length) {
669 embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
670 if(!((text[i]==CR) && (text[i+1]==LF))) {
671 pBiDi->paras[paraIndex++]=i+1;
672 }
673 }
674 flags|=DIRPROP_FLAG(B);
675 break;
676 case BN:
677 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
678 /* they will get their levels set correctly in adjustWSLevels() */
679 flags|=DIRPROP_FLAG(BN);
680 break;
681 default:
682 /* all other types get the "real" level */
683 if(level!=embeddingLevel) {
684 level=embeddingLevel;
685 if(level&UBIDI_LEVEL_OVERRIDE) {
686 flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
687 } else {
688 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
689 }
690 }
691 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
692 flags|=DIRPROP_FLAG(dirProp);
693 }
694 break;
695 }
696
697 /*
698 * We need to set reasonable levels even on BN codes and
699 * explicit codes because we will later look at same-level runs (X10).
700 */
701 levels[i]=level;
702 }
703 if(flags&MASK_EMBEDDING) {
704 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
705 }
706 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
707 flags|=DIRPROP_FLAG(L);
708 }
709
710 /* subsequently, ignore the explicit codes and BN (X9) */
711
712 /* again, determine if the text is mixed-directional or single-directional */
713 pBiDi->flags=flags;
714 direction=directionFromFlags(pBiDi);
715 }
716
717 return direction;
718 }
719
720 /*
721 * Use a pre-specified embedding levels array:
722 *
723 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
724 * ignore all explicit codes (X9),
725 * and check all the preset levels.
726 *
727 * Recalculate the flags to have them reflect the real properties
728 * after taking the explicit embeddings into account.
729 */
730 static UBiDiDirection
checkExplicitLevels(UBiDi * pBiDi,UErrorCode * pErrorCode)731 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
732 const DirProp *dirProps=pBiDi->dirProps;
733 DirProp dirProp;
734 UBiDiLevel *levels=pBiDi->levels;
735 const UChar *text=pBiDi->text;
736
737 int32_t i, length=pBiDi->length;
738 Flags flags=0; /* collect all directionalities in the text */
739 UBiDiLevel level;
740 uint32_t paraIndex=0;
741
742 for(i=0; i<length; ++i) {
743 level=levels[i];
744 dirProp=NO_CONTEXT_RTL(dirProps[i]);
745 if(level&UBIDI_LEVEL_OVERRIDE) {
746 /* keep the override flag in levels[i] but adjust the flags */
747 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */
748 flags|=DIRPROP_FLAG_O(level);
749 } else {
750 /* set the flags */
751 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
752 }
753 if((level<GET_PARALEVEL(pBiDi, i) &&
754 !((0==level)&&(dirProp==B))) ||
755 (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
756 /* level out of bounds */
757 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
758 return UBIDI_LTR;
759 }
760 if((dirProp==B) && ((i+1)<length)) {
761 if(!((text[i]==CR) && (text[i+1]==LF))) {
762 pBiDi->paras[paraIndex++]=i+1;
763 }
764 }
765 }
766 if(flags&MASK_EMBEDDING) {
767 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
768 }
769
770 /* determine if the text is mixed-directional or single-directional */
771 pBiDi->flags=flags;
772 return directionFromFlags(pBiDi);
773 }
774
775 /******************************************************************
776 The Properties state machine table
777 *******************************************************************
778
779 All table cells are 8 bits:
780 bits 0..4: next state
781 bits 5..7: action to perform (if > 0)
782
783 Cells may be of format "n" where n represents the next state
784 (except for the rightmost column).
785 Cells may also be of format "s(x,y)" where x represents an action
786 to perform and y represents the next state.
787
788 *******************************************************************
789 Definitions and type for properties state table
790 *******************************************************************
791 */
792 #define IMPTABPROPS_COLUMNS 14
793 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
794 #define GET_STATEPROPS(cell) ((cell)&0x1f)
795 #define GET_ACTIONPROPS(cell) ((cell)>>5)
796 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
797
798 static const uint8_t groupProp[] = /* dirProp regrouped */
799 {
800 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */
801 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10
802 };
803 enum { _L=0, _R=1, _EN=2, _AN=3, _ON=4, _S=5, _B=6 }; /* reduced dirProp */
804
805 /******************************************************************
806
807 PROPERTIES STATE TABLE
808
809 In table impTabProps,
810 - the ON column regroups ON and WS
811 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
812 - the Res column is the reduced property assigned to a run
813
814 Action 1: process current run1, init new run1
815 2: init new run2
816 3: process run1, process run2, init new run1
817 4: process run1, set run1=run2, init new run2
818
819 Notes:
820 1) This table is used in resolveImplicitLevels().
821 2) This table triggers actions when there is a change in the Bidi
822 property of incoming characters (action 1).
823 3) Most such property sequences are processed immediately (in
824 fact, passed to processPropertySeq().
825 4) However, numbers are assembled as one sequence. This means
826 that undefined situations (like CS following digits, until
827 it is known if the next char will be a digit) are held until
828 following chars define them.
829 Example: digits followed by CS, then comes another CS or ON;
830 the digits will be processed, then the CS assigned
831 as the start of an ON sequence (action 3).
832 5) There are cases where more than one sequence must be
833 processed, for instance digits followed by CS followed by L:
834 the digits must be processed as one sequence, and the CS
835 must be processed as an ON sequence, all this before starting
836 assembling chars for the opening L sequence.
837
838
839 */
840 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
841 {
842 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , Res */
843 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , _ON },
844 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3), _L },
845 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3), _R },
846 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 , _R },
847 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), _EN },
848 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3), _AN },
849 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), _AN },
850 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3), _ON },
851 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3), _ON },
852 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), _ON },
853 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), _EN },
854 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), _EN },
855 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3), _AN },
856 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), _AN },
857 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3), _ON },
858 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3), _S },
859 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3), _S },
860 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3), _B }
861 };
862
863 /* we must undef macro s because the levels table have a different
864 * structure (4 bits for action and 4 bits for next state.
865 */
866 #undef s
867
868 /******************************************************************
869 The levels state machine tables
870 *******************************************************************
871
872 All table cells are 8 bits:
873 bits 0..3: next state
874 bits 4..7: action to perform (if > 0)
875
876 Cells may be of format "n" where n represents the next state
877 (except for the rightmost column).
878 Cells may also be of format "s(x,y)" where x represents an action
879 to perform and y represents the next state.
880
881 This format limits each table to 16 states each and to 15 actions.
882
883 *******************************************************************
884 Definitions and type for levels state tables
885 *******************************************************************
886 */
887 #define IMPTABLEVELS_COLUMNS (_B + 2)
888 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
889 #define GET_STATE(cell) ((cell)&0x0f)
890 #define GET_ACTION(cell) ((cell)>>4)
891 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
892
893 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
894 typedef uint8_t ImpAct[];
895
896 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
897 * instead of having a pair of ImpTab and a pair of ImpAct.
898 */
899 typedef struct ImpTabPair {
900 const void * pImpTab[2];
901 const void * pImpAct[2];
902 } ImpTabPair;
903
904 /******************************************************************
905
906 LEVELS STATE TABLES
907
908 In all levels state tables,
909 - state 0 is the initial state
910 - the Res column is the increment to add to the text level
911 for this property sequence.
912
913 The impAct arrays for each table of a pair map the local action
914 numbers of the table to the total list of actions. For instance,
915 action 2 in a given table corresponds to the action number which
916 appears in entry [2] of the impAct array for that table.
917 The first entry of all impAct arrays must be 0.
918
919 Action 1: init conditional sequence
920 2: prepend conditional sequence to current sequence
921 3: set ON sequence to new level - 1
922 4: init EN/AN/ON sequence
923 5: fix EN/AN/ON sequence followed by R
924 6: set previous level sequence to level 2
925
926 Notes:
927 1) These tables are used in processPropertySeq(). The input
928 is property sequences as determined by resolveImplicitLevels.
929 2) Most such property sequences are processed immediately
930 (levels are assigned).
931 3) However, some sequences cannot be assigned a final level till
932 one or more following sequences are received. For instance,
933 ON following an R sequence within an even-level paragraph.
934 If the following sequence is R, the ON sequence will be
935 assigned basic run level+1, and so will the R sequence.
936 4) S is generally handled like ON, since its level will be fixed
937 to paragraph level in adjustWSLevels().
938
939 */
940
941 static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
942 /* In this table, conditional sequences receive the higher possible level
943 until proven otherwise.
944 */
945 {
946 /* L , R , EN , AN , ON , S , B , Res */
947 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
948 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
949 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
950 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
951 /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 },
952 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 }
953 };
954 static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
955 /* In this table, conditional sequences receive the lower possible level
956 until proven otherwise.
957 */
958 {
959 /* L , R , EN , AN , ON , S , B , Res */
960 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
961 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
962 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
963 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
964 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
965 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
966 };
967 static const ImpAct impAct0 = {0,1,2,3,4,5,6};
968 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
969 &impTabR_DEFAULT},
970 {&impAct0, &impAct0}};
971
972 static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
973 /* In this table, conditional sequences receive the higher possible level
974 until proven otherwise.
975 */
976 {
977 /* L , R , EN , AN , ON , S , B , Res */
978 /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 },
979 /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 },
980 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 },
981 /* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 },
982 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
983 };
984 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
985 &impTabR_DEFAULT},
986 {&impAct0, &impAct0}};
987
988 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
989 /* In this table, EN/AN+ON sequences receive levels as if associated with R
990 until proven that there is L or sor/eor on both sides. AN is handled like EN.
991 */
992 {
993 /* L , R , EN , AN , ON , S , B , Res */
994 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
995 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
996 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
997 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
998 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
999 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
1000 };
1001 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1002 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1003 until proven that there is L on both sides. AN is handled like EN.
1004 */
1005 {
1006 /* L , R , EN , AN , ON , S , B , Res */
1007 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1008 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1009 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1010 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1011 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
1012 };
1013 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1014 {&impTabL_GROUP_NUMBERS_WITH_R,
1015 &impTabR_GROUP_NUMBERS_WITH_R},
1016 {&impAct0, &impAct0}};
1017
1018
1019 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1020 /* This table is identical to the Default LTR table except that EN and AN are
1021 handled like L.
1022 */
1023 {
1024 /* L , R , EN , AN , ON , S , B , Res */
1025 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
1026 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1027 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1028 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1029 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1030 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
1031 };
1032 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1033 /* This table is identical to the Default RTL table except that EN and AN are
1034 handled like L.
1035 */
1036 {
1037 /* L , R , EN , AN , ON , S , B , Res */
1038 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1039 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
1040 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1041 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
1042 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
1043 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1044 };
1045 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1046 {&impTabL_INVERSE_NUMBERS_AS_L,
1047 &impTabR_INVERSE_NUMBERS_AS_L},
1048 {&impAct0, &impAct0}};
1049
1050 static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
1051 /* In this table, conditional sequences receive the lower possible level
1052 until proven otherwise.
1053 */
1054 {
1055 /* L , R , EN , AN , ON , S , B , Res */
1056 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1057 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
1058 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1059 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1060 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1061 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1062 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
1063 };
1064 static const ImpAct impAct1 = {0,1,11,12};
1065 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1066 */
1067 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1068 {&impTabL_DEFAULT,
1069 &impTabR_INVERSE_LIKE_DIRECT},
1070 {&impAct0, &impAct1}};
1071
1072 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1073 /* The case handled in this table is (visually): R EN L
1074 */
1075 {
1076 /* L , R , EN , AN , ON , S , B , Res */
1077 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1078 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1079 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1080 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1081 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1082 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1083 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
1084 };
1085 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1086 /* The cases handled in this table are (visually): R EN L
1087 R L AN L
1088 */
1089 {
1090 /* L , R , EN , AN , ON , S , B , Res */
1091 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1092 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1093 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1094 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1095 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1096 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1097 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
1098 };
1099 static const ImpAct impAct2 = {0,1,7,8,9,10};
1100 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1101 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1102 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1103 {&impAct0, &impAct2}};
1104
1105 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1106 {&impTabL_NUMBERS_SPECIAL,
1107 &impTabR_INVERSE_LIKE_DIRECT},
1108 {&impAct0, &impAct1}};
1109
1110 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1111 /* The case handled in this table is (visually): R EN L
1112 */
1113 {
1114 /* L , R , EN , AN , ON , S , B , Res */
1115 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1116 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1117 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1118 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1119 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
1120 };
1121 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1122 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1123 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1124 {&impAct0, &impAct2}};
1125
1126 #undef s
1127
1128 typedef struct {
1129 const ImpTab * pImpTab; /* level table pointer */
1130 const ImpAct * pImpAct; /* action map array */
1131 int32_t startON; /* start of ON sequence */
1132 int32_t startL2EN; /* start of level 2 sequence */
1133 int32_t lastStrongRTL; /* index of last found R or AL */
1134 int32_t state; /* current state */
1135 UBiDiLevel runLevel; /* run level before implicit solving */
1136 } LevState;
1137
1138 /*------------------------------------------------------------------------*/
1139
1140 static void
addPoint(UBiDi * pBiDi,int32_t pos,int32_t flag)1141 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1142 /* param pos: position where to insert
1143 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1144 */
1145 {
1146 #define FIRSTALLOC 10
1147 Point point;
1148 InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1149
1150 if (pInsertPoints->capacity == 0)
1151 {
1152 pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
1153 if (pInsertPoints->points == NULL)
1154 {
1155 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1156 return;
1157 }
1158 pInsertPoints->capacity=FIRSTALLOC;
1159 }
1160 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1161 {
1162 void * savePoints=pInsertPoints->points;
1163 pInsertPoints->points=uprv_realloc(pInsertPoints->points,
1164 pInsertPoints->capacity*2*sizeof(Point));
1165 if (pInsertPoints->points == NULL)
1166 {
1167 pInsertPoints->points=savePoints;
1168 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1169 return;
1170 }
1171 else pInsertPoints->capacity*=2;
1172 }
1173 point.pos=pos;
1174 point.flag=flag;
1175 pInsertPoints->points[pInsertPoints->size]=point;
1176 pInsertPoints->size++;
1177 #undef FIRSTALLOC
1178 }
1179
1180 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1181
1182 /*
1183 * This implementation of the (Wn) rules applies all rules in one pass.
1184 * In order to do so, it needs a look-ahead of typically 1 character
1185 * (except for W5: sequences of ET) and keeps track of changes
1186 * in a rule Wp that affect a later Wq (p<q).
1187 *
1188 * The (Nn) and (In) rules are also performed in that same single loop,
1189 * but effectively one iteration behind for white space.
1190 *
1191 * Since all implicit rules are performed in one step, it is not necessary
1192 * to actually store the intermediate directional properties in dirProps[].
1193 */
1194
1195 static void
processPropertySeq(UBiDi * pBiDi,LevState * pLevState,uint8_t _prop,int32_t start,int32_t limit)1196 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1197 int32_t start, int32_t limit) {
1198 uint8_t cell, oldStateSeq, actionSeq;
1199 const ImpTab * pImpTab=pLevState->pImpTab;
1200 const ImpAct * pImpAct=pLevState->pImpAct;
1201 UBiDiLevel * levels=pBiDi->levels;
1202 UBiDiLevel level, addLevel;
1203 InsertPoints * pInsertPoints;
1204 int32_t start0, k;
1205
1206 start0=start; /* save original start position */
1207 oldStateSeq=(uint8_t)pLevState->state;
1208 cell=(*pImpTab)[oldStateSeq][_prop];
1209 pLevState->state=GET_STATE(cell); /* isolate the new state */
1210 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1211 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1212
1213 if(actionSeq) {
1214 switch(actionSeq) {
1215 case 1: /* init ON seq */
1216 pLevState->startON=start0;
1217 break;
1218
1219 case 2: /* prepend ON seq to current seq */
1220 start=pLevState->startON;
1221 break;
1222
1223 case 3: /* L or S after possible relevant EN/AN */
1224 /* check if we had EN after R/AL */
1225 if (pLevState->startL2EN >= 0) {
1226 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1227 }
1228 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
1229 /* check if we had any relevant EN/AN after R/AL */
1230 pInsertPoints=&(pBiDi->insertPoints);
1231 if ((pInsertPoints->capacity == 0) ||
1232 (pInsertPoints->size <= pInsertPoints->confirmed))
1233 {
1234 /* nothing, just clean up */
1235 pLevState->lastStrongRTL=-1;
1236 /* check if we have a pending conditional segment */
1237 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1238 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
1239 start=pLevState->startON; /* reset to basic run level */
1240 }
1241 if (_prop == _S) /* add LRM before S */
1242 {
1243 addPoint(pBiDi, start0, LRM_BEFORE);
1244 pInsertPoints->confirmed=pInsertPoints->size;
1245 }
1246 break;
1247 }
1248 /* reset previous RTL cont to level for LTR text */
1249 for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1250 {
1251 /* reset odd level, leave runLevel+2 as is */
1252 levels[k]=(levels[k] - 2) & ~1;
1253 }
1254 /* mark insert points as confirmed */
1255 pInsertPoints->confirmed=pInsertPoints->size;
1256 pLevState->lastStrongRTL=-1;
1257 if (_prop == _S) /* add LRM before S */
1258 {
1259 addPoint(pBiDi, start0, LRM_BEFORE);
1260 pInsertPoints->confirmed=pInsertPoints->size;
1261 }
1262 break;
1263
1264 case 4: /* R/AL after possible relevant EN/AN */
1265 /* just clean up */
1266 pInsertPoints=&(pBiDi->insertPoints);
1267 if (pInsertPoints->capacity > 0)
1268 /* remove all non confirmed insert points */
1269 pInsertPoints->size=pInsertPoints->confirmed;
1270 pLevState->startON=-1;
1271 pLevState->startL2EN=-1;
1272 pLevState->lastStrongRTL=limit - 1;
1273 break;
1274
1275 case 5: /* EN/AN after R/AL + possible cont */
1276 /* check for real AN */
1277 if ((_prop == _AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) &&
1278 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1279 {
1280 /* real AN */
1281 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1282 {
1283 /* just note the righmost digit as a strong RTL */
1284 pLevState->lastStrongRTL=limit - 1;
1285 break;
1286 }
1287 if (pLevState->startL2EN >= 0) /* after EN, no AN */
1288 {
1289 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1290 pLevState->startL2EN=-2;
1291 }
1292 /* note AN */
1293 addPoint(pBiDi, start0, LRM_BEFORE);
1294 break;
1295 }
1296 /* if first EN/AN after R/AL */
1297 if (pLevState->startL2EN == -1) {
1298 pLevState->startL2EN=start0;
1299 }
1300 break;
1301
1302 case 6: /* note location of latest R/AL */
1303 pLevState->lastStrongRTL=limit - 1;
1304 pLevState->startON=-1;
1305 break;
1306
1307 case 7: /* L after R+ON/EN/AN */
1308 /* include possible adjacent number on the left */
1309 for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1310 if(k>=0) {
1311 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
1312 pInsertPoints=&(pBiDi->insertPoints);
1313 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
1314 }
1315 pLevState->startON=start0;
1316 break;
1317
1318 case 8: /* AN after L */
1319 /* AN numbers between L text on both sides may be trouble. */
1320 /* tentatively bracket with LRMs; will be confirmed if followed by L */
1321 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
1322 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
1323 break;
1324
1325 case 9: /* R after L+ON/EN/AN */
1326 /* false alert, infirm LRMs around previous AN */
1327 pInsertPoints=&(pBiDi->insertPoints);
1328 pInsertPoints->size=pInsertPoints->confirmed;
1329 if (_prop == _S) /* add RLM before S */
1330 {
1331 addPoint(pBiDi, start0, RLM_BEFORE);
1332 pInsertPoints->confirmed=pInsertPoints->size;
1333 }
1334 break;
1335
1336 case 10: /* L after L+ON/AN */
1337 level=pLevState->runLevel + addLevel;
1338 for(k=pLevState->startON; k<start0; k++) {
1339 if (levels[k]<level)
1340 levels[k]=level;
1341 }
1342 pInsertPoints=&(pBiDi->insertPoints);
1343 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
1344 pLevState->startON=start0;
1345 break;
1346
1347 case 11: /* L after L+ON+EN/AN/ON */
1348 level=pLevState->runLevel;
1349 for(k=start0-1; k>=pLevState->startON; k--) {
1350 if(levels[k]==level+3) {
1351 while(levels[k]==level+3) {
1352 levels[k--]-=2;
1353 }
1354 while(levels[k]==level) {
1355 k--;
1356 }
1357 }
1358 if(levels[k]==level+2) {
1359 levels[k]=level;
1360 continue;
1361 }
1362 levels[k]=level+1;
1363 }
1364 break;
1365
1366 case 12: /* R after L+ON+EN/AN/ON */
1367 level=pLevState->runLevel+1;
1368 for(k=start0-1; k>=pLevState->startON; k--) {
1369 if(levels[k]>level) {
1370 levels[k]-=2;
1371 }
1372 }
1373 break;
1374
1375 default: /* we should never get here */
1376 U_ASSERT(FALSE);
1377 break;
1378 }
1379 }
1380 if((addLevel) || (start < start0)) {
1381 level=pLevState->runLevel + addLevel;
1382 for(k=start; k<limit; k++) {
1383 levels[k]=level;
1384 }
1385 }
1386 }
1387
1388 static void
resolveImplicitLevels(UBiDi * pBiDi,int32_t start,int32_t limit,DirProp sor,DirProp eor)1389 resolveImplicitLevels(UBiDi *pBiDi,
1390 int32_t start, int32_t limit,
1391 DirProp sor, DirProp eor) {
1392 const DirProp *dirProps=pBiDi->dirProps;
1393
1394 LevState levState;
1395 int32_t i, start1, start2;
1396 uint8_t oldStateImp, stateImp, actionImp;
1397 uint8_t gprop, resProp, cell;
1398 UBool inverseRTL;
1399 DirProp nextStrongProp=R;
1400 int32_t nextStrongPos=-1;
1401
1402 /* check for RTL inverse BiDi mode */
1403 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
1404 * loop on the text characters from end to start.
1405 * This would need a different properties state table (at least different
1406 * actions) and different levels state tables (maybe very similar to the
1407 * LTR corresponding ones.
1408 */
1409 inverseRTL=(UBool)
1410 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
1411 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
1412 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
1413 /* initialize for levels state table */
1414 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1415 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1416 levState.state=0;
1417 levState.runLevel=pBiDi->levels[start];
1418 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
1419 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
1420 processPropertySeq(pBiDi, &levState, sor, start, start);
1421 /* initialize for property state table */
1422 if(dirProps[start]==NSM) {
1423 stateImp = 1 + sor;
1424 } else {
1425 stateImp=0;
1426 }
1427 start1=start;
1428 start2=start;
1429
1430 for(i=start; i<=limit; i++) {
1431 if(i>=limit) {
1432 gprop=eor;
1433 } else {
1434 DirProp prop, prop1;
1435 prop=NO_CONTEXT_RTL(dirProps[i]);
1436 if(inverseRTL) {
1437 if(prop==AL) {
1438 /* AL before EN does not make it AN */
1439 prop=R;
1440 } else if(prop==EN) {
1441 if(nextStrongPos<=i) {
1442 /* look for next strong char (L/R/AL) */
1443 int32_t j;
1444 nextStrongProp=R; /* set default */
1445 nextStrongPos=limit;
1446 for(j=i+1; j<limit; j++) {
1447 prop1=NO_CONTEXT_RTL(dirProps[j]);
1448 if(prop1==L || prop1==R || prop1==AL) {
1449 nextStrongProp=prop1;
1450 nextStrongPos=j;
1451 break;
1452 }
1453 }
1454 }
1455 if(nextStrongProp==AL) {
1456 prop=AN;
1457 }
1458 }
1459 }
1460 gprop=groupProp[prop];
1461 }
1462 oldStateImp=stateImp;
1463 cell=impTabProps[oldStateImp][gprop];
1464 stateImp=GET_STATEPROPS(cell); /* isolate the new state */
1465 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
1466 if((i==limit) && (actionImp==0)) {
1467 /* there is an unprocessed sequence if its property == eor */
1468 actionImp=1; /* process the last sequence */
1469 }
1470 if(actionImp) {
1471 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
1472 switch(actionImp) {
1473 case 1: /* process current seq1, init new seq1 */
1474 processPropertySeq(pBiDi, &levState, resProp, start1, i);
1475 start1=i;
1476 break;
1477 case 2: /* init new seq2 */
1478 start2=i;
1479 break;
1480 case 3: /* process seq1, process seq2, init new seq1 */
1481 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
1482 processPropertySeq(pBiDi, &levState, _ON, start2, i);
1483 start1=i;
1484 break;
1485 case 4: /* process seq1, set seq1=seq2, init new seq2 */
1486 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
1487 start1=start2;
1488 start2=i;
1489 break;
1490 default: /* we should never get here */
1491 U_ASSERT(FALSE);
1492 break;
1493 }
1494 }
1495 }
1496 /* flush possible pending sequence, e.g. ON */
1497 processPropertySeq(pBiDi, &levState, eor, limit, limit);
1498 }
1499
1500 /* perform (L1) and (X9) ---------------------------------------------------- */
1501
1502 /*
1503 * Reset the embedding levels for some non-graphic characters (L1).
1504 * This function also sets appropriate levels for BN, and
1505 * explicit embedding types that are supposed to have been removed
1506 * from the paragraph in (X9).
1507 */
1508 static void
adjustWSLevels(UBiDi * pBiDi)1509 adjustWSLevels(UBiDi *pBiDi) {
1510 const DirProp *dirProps=pBiDi->dirProps;
1511 UBiDiLevel *levels=pBiDi->levels;
1512 int32_t i;
1513
1514 if(pBiDi->flags&MASK_WS) {
1515 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
1516 Flags flag;
1517
1518 i=pBiDi->trailingWSStart;
1519 while(i>0) {
1520 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1521 while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) {
1522 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
1523 levels[i]=0;
1524 } else {
1525 levels[i]=GET_PARALEVEL(pBiDi, i);
1526 }
1527 }
1528
1529 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1530 /* here, i+1 is guaranteed to be <length */
1531 while(i>0) {
1532 flag=DIRPROP_FLAG_NC(dirProps[--i]);
1533 if(flag&MASK_BN_EXPLICIT) {
1534 levels[i]=levels[i+1];
1535 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
1536 levels[i]=0;
1537 break;
1538 } else if(flag&MASK_B_S) {
1539 levels[i]=GET_PARALEVEL(pBiDi, i);
1540 break;
1541 }
1542 }
1543 }
1544 }
1545 }
1546
1547 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
1548 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
1549 static void
setParaRunsOnly(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UErrorCode * pErrorCode)1550 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
1551 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
1552 void *runsOnlyMemory;
1553 int32_t *visualMap;
1554 UChar *visualText;
1555 int32_t saveLength, saveTrailingWSStart;
1556 const UBiDiLevel *levels;
1557 UBiDiLevel *saveLevels;
1558 UBiDiDirection saveDirection;
1559 UBool saveMayAllocateText;
1560 Run *runs;
1561 int32_t visualLength, i, j, visualStart, logicalStart,
1562 runCount, runLength, addedRuns, insertRemove,
1563 start, limit, step, indexOddBit, logicalPos,
1564 index, index1;
1565 uint32_t saveOptions;
1566
1567 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
1568 if(length==0) {
1569 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
1570 goto cleanup3;
1571 }
1572 /* obtain memory for mapping table and visual text */
1573 runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
1574 if(runsOnlyMemory==NULL) {
1575 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1576 goto cleanup3;
1577 }
1578 visualMap=runsOnlyMemory;
1579 visualText=(UChar *)&visualMap[length];
1580 saveLevels=(UBiDiLevel *)&visualText[length];
1581 saveOptions=pBiDi->reorderingOptions;
1582 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
1583 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
1584 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
1585 }
1586 paraLevel&=1; /* accept only 0 or 1 */
1587 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
1588 if(U_FAILURE(*pErrorCode)) {
1589 goto cleanup3;
1590 }
1591 /* we cannot access directly pBiDi->levels since it is not yet set if
1592 * direction is not MIXED
1593 */
1594 levels=ubidi_getLevels(pBiDi, pErrorCode);
1595 uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
1596 saveTrailingWSStart=pBiDi->trailingWSStart;
1597 saveLength=pBiDi->length;
1598 saveDirection=pBiDi->direction;
1599
1600 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
1601 * the visual map and the dirProps array to drive the second call
1602 * to ubidi_setPara (but must make provision for possible removal of
1603 * BiDi controls. Alternatively, only use the dirProps array via
1604 * customized classifier callback.
1605 */
1606 visualLength=ubidi_writeReordered(pBiDi, visualText, length,
1607 UBIDI_DO_MIRRORING, pErrorCode);
1608 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
1609 if(U_FAILURE(*pErrorCode)) {
1610 goto cleanup2;
1611 }
1612 pBiDi->reorderingOptions=saveOptions;
1613
1614 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
1615 paraLevel^=1;
1616 /* Because what we did with reorderingOptions, visualText may be shorter
1617 * than the original text. But we don't want the levels memory to be
1618 * reallocated shorter than the original length, since we need to restore
1619 * the levels as after the first call to ubidi_setpara() before returning.
1620 * We will force mayAllocateText to FALSE before the second call to
1621 * ubidi_setpara(), and will restore it afterwards.
1622 */
1623 saveMayAllocateText=pBiDi->mayAllocateText;
1624 pBiDi->mayAllocateText=FALSE;
1625 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
1626 pBiDi->mayAllocateText=saveMayAllocateText;
1627 ubidi_getRuns(pBiDi, pErrorCode);
1628 if(U_FAILURE(*pErrorCode)) {
1629 goto cleanup1;
1630 }
1631 /* check if some runs must be split, count how many splits */
1632 addedRuns=0;
1633 runCount=pBiDi->runCount;
1634 runs=pBiDi->runs;
1635 visualStart=0;
1636 for(i=0; i<runCount; i++, visualStart+=runLength) {
1637 runLength=runs[i].visualLimit-visualStart;
1638 if(runLength<2) {
1639 continue;
1640 }
1641 logicalStart=GET_INDEX(runs[i].logicalStart);
1642 for(j=logicalStart+1; j<logicalStart+runLength; j++) {
1643 index=visualMap[j];
1644 index1=visualMap[j-1];
1645 if((BIDI_ABS(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
1646 addedRuns++;
1647 }
1648 }
1649 }
1650 if(addedRuns) {
1651 if(getRunsMemory(pBiDi, runCount+addedRuns)) {
1652 if(runCount==1) {
1653 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
1654 pBiDi->runsMemory[0]=runs[0];
1655 }
1656 runs=pBiDi->runs=pBiDi->runsMemory;
1657 pBiDi->runCount+=addedRuns;
1658 } else {
1659 goto cleanup1;
1660 }
1661 }
1662 /* split runs which are not consecutive in source text */
1663 for(i=runCount-1; i>=0; i--) {
1664 runLength= i==0 ? runs[0].visualLimit :
1665 runs[i].visualLimit-runs[i-1].visualLimit;
1666 logicalStart=runs[i].logicalStart;
1667 indexOddBit=GET_ODD_BIT(logicalStart);
1668 logicalStart=GET_INDEX(logicalStart);
1669 if(runLength<2) {
1670 if(addedRuns) {
1671 runs[i+addedRuns]=runs[i];
1672 }
1673 logicalPos=visualMap[logicalStart];
1674 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1675 saveLevels[logicalPos]^indexOddBit);
1676 continue;
1677 }
1678 if(indexOddBit) {
1679 start=logicalStart;
1680 limit=logicalStart+runLength-1;
1681 step=1;
1682 } else {
1683 start=logicalStart+runLength-1;
1684 limit=logicalStart;
1685 step=-1;
1686 }
1687 for(j=start; j!=limit; j+=step) {
1688 index=visualMap[j];
1689 index1=visualMap[j+step];
1690 if((BIDI_ABS(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
1691 logicalPos=BIDI_MIN(visualMap[start], index);
1692 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1693 saveLevels[logicalPos]^indexOddBit);
1694 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
1695 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
1696 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
1697 runs[i+addedRuns].insertRemove=insertRemove;
1698 runs[i].insertRemove&=~insertRemove;
1699 start=j+step;
1700 addedRuns--;
1701 }
1702 }
1703 if(addedRuns) {
1704 runs[i+addedRuns]=runs[i];
1705 }
1706 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
1707 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1708 saveLevels[logicalPos]^indexOddBit);
1709 }
1710
1711 cleanup1:
1712 /* restore initial paraLevel */
1713 pBiDi->paraLevel^=1;
1714 cleanup2:
1715 /* restore real text */
1716 pBiDi->text=text;
1717 pBiDi->length=saveLength;
1718 pBiDi->originalLength=length;
1719 pBiDi->direction=saveDirection;
1720 /* the saved levels should never excess levelsSize, but we check anyway */
1721 if(saveLength>pBiDi->levelsSize) {
1722 saveLength=pBiDi->levelsSize;
1723 }
1724 uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
1725 pBiDi->trailingWSStart=saveTrailingWSStart;
1726 /* free memory for mapping table and visual text */
1727 uprv_free(runsOnlyMemory);
1728 if(pBiDi->runCount>1) {
1729 pBiDi->direction=UBIDI_MIXED;
1730 }
1731 cleanup3:
1732 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
1733 }
1734
1735 /* ubidi_setPara ------------------------------------------------------------ */
1736
1737 U_CAPI void U_EXPORT2
ubidi_setPara(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * pErrorCode)1738 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
1739 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
1740 UErrorCode *pErrorCode) {
1741 UBiDiDirection direction;
1742
1743 /* check the argument values */
1744 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
1745 if(pBiDi==NULL || text==NULL || length<-1 ||
1746 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
1747 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1748 return;
1749 }
1750
1751 if(length==-1) {
1752 length=u_strlen(text);
1753 }
1754
1755 /* special treatment for RUNS_ONLY mode */
1756 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
1757 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
1758 return;
1759 }
1760
1761 /* initialize the UBiDi structure */
1762 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
1763 pBiDi->text=text;
1764 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
1765 pBiDi->paraLevel=paraLevel;
1766 pBiDi->direction=UBIDI_LTR;
1767 pBiDi->paraCount=1;
1768
1769 pBiDi->dirProps=NULL;
1770 pBiDi->levels=NULL;
1771 pBiDi->runs=NULL;
1772 pBiDi->insertPoints.size=0; /* clean up from last call */
1773 pBiDi->insertPoints.confirmed=0; /* clean up from last call */
1774
1775 /*
1776 * Save the original paraLevel if contextual; otherwise, set to 0.
1777 */
1778 if(IS_DEFAULT_LEVEL(paraLevel)) {
1779 pBiDi->defaultParaLevel=paraLevel;
1780 } else {
1781 pBiDi->defaultParaLevel=0;
1782 }
1783
1784 if(length==0) {
1785 /*
1786 * For an empty paragraph, create a UBiDi object with the paraLevel and
1787 * the flags and the direction set but without allocating zero-length arrays.
1788 * There is nothing more to do.
1789 */
1790 if(IS_DEFAULT_LEVEL(paraLevel)) {
1791 pBiDi->paraLevel&=1;
1792 pBiDi->defaultParaLevel=0;
1793 }
1794 if(paraLevel&1) {
1795 pBiDi->flags=DIRPROP_FLAG(R);
1796 pBiDi->direction=UBIDI_RTL;
1797 } else {
1798 pBiDi->flags=DIRPROP_FLAG(L);
1799 pBiDi->direction=UBIDI_LTR;
1800 }
1801
1802 pBiDi->runCount=0;
1803 pBiDi->paraCount=0;
1804 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
1805 return;
1806 }
1807
1808 pBiDi->runCount=-1;
1809
1810 /*
1811 * Get the directional properties,
1812 * the flags bit-set, and
1813 * determine the paragraph level if necessary.
1814 */
1815 if(getDirPropsMemory(pBiDi, length)) {
1816 pBiDi->dirProps=pBiDi->dirPropsMemory;
1817 getDirProps(pBiDi);
1818 } else {
1819 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1820 return;
1821 }
1822 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
1823 length= pBiDi->length;
1824 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
1825 /* allocate paras memory */
1826 if(pBiDi->paraCount>1) {
1827 if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) {
1828 pBiDi->paras=pBiDi->parasMemory;
1829 pBiDi->paras[pBiDi->paraCount-1]=length;
1830 } else {
1831 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1832 return;
1833 }
1834 } else {
1835 /* initialize paras for single paragraph */
1836 pBiDi->paras=pBiDi->simpleParas;
1837 pBiDi->simpleParas[0]=length;
1838 }
1839
1840 /* are explicit levels specified? */
1841 if(embeddingLevels==NULL) {
1842 /* no: determine explicit levels according to the (Xn) rules */\
1843 if(getLevelsMemory(pBiDi, length)) {
1844 pBiDi->levels=pBiDi->levelsMemory;
1845 direction=resolveExplicitLevels(pBiDi);
1846 } else {
1847 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1848 return;
1849 }
1850 } else {
1851 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
1852 pBiDi->levels=embeddingLevels;
1853 direction=checkExplicitLevels(pBiDi, pErrorCode);
1854 if(U_FAILURE(*pErrorCode)) {
1855 return;
1856 }
1857 }
1858
1859 /*
1860 * The steps after (X9) in the UBiDi algorithm are performed only if
1861 * the paragraph text has mixed directionality!
1862 */
1863 pBiDi->direction=direction;
1864 switch(direction) {
1865 case UBIDI_LTR:
1866 /* make sure paraLevel is even */
1867 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
1868
1869 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1870 pBiDi->trailingWSStart=0;
1871 break;
1872 case UBIDI_RTL:
1873 /* make sure paraLevel is odd */
1874 pBiDi->paraLevel|=1;
1875
1876 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1877 pBiDi->trailingWSStart=0;
1878 break;
1879 default:
1880 /*
1881 * Choose the right implicit state table
1882 */
1883 switch(pBiDi->reorderingMode) {
1884 case UBIDI_REORDER_DEFAULT:
1885 pBiDi->pImpTabPair=&impTab_DEFAULT;
1886 break;
1887 case UBIDI_REORDER_NUMBERS_SPECIAL:
1888 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
1889 break;
1890 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
1891 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
1892 break;
1893 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
1894 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
1895 break;
1896 case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
1897 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
1898 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
1899 } else {
1900 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
1901 }
1902 break;
1903 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
1904 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
1905 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
1906 } else {
1907 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
1908 }
1909 break;
1910 default:
1911 /* we should never get here */
1912 U_ASSERT(FALSE);
1913 break;
1914 }
1915 /*
1916 * If there are no external levels specified and there
1917 * are no significant explicit level codes in the text,
1918 * then we can treat the entire paragraph as one run.
1919 * Otherwise, we need to perform the following rules on runs of
1920 * the text with the same embedding levels. (X10)
1921 * "Significant" explicit level codes are ones that actually
1922 * affect non-BN characters.
1923 * Examples for "insignificant" ones are empty embeddings
1924 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
1925 */
1926 if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
1927 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
1928 resolveImplicitLevels(pBiDi, 0, length,
1929 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
1930 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
1931 } else {
1932 /* sor, eor: start and end types of same-level-run */
1933 UBiDiLevel *levels=pBiDi->levels;
1934 int32_t start, limit=0;
1935 UBiDiLevel level, nextLevel;
1936 DirProp sor, eor;
1937
1938 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
1939 level=GET_PARALEVEL(pBiDi, 0);
1940 nextLevel=levels[0];
1941 if(level<nextLevel) {
1942 eor=GET_LR_FROM_LEVEL(nextLevel);
1943 } else {
1944 eor=GET_LR_FROM_LEVEL(level);
1945 }
1946
1947 do {
1948 /* determine start and limit of the run (end points just behind the run) */
1949
1950 /* the values for this run's start are the same as for the previous run's end */
1951 start=limit;
1952 level=nextLevel;
1953 if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) {
1954 /* except if this is a new paragraph, then set sor = para level */
1955 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
1956 } else {
1957 sor=eor;
1958 }
1959
1960 /* search for the limit of this run */
1961 while(++limit<length && levels[limit]==level) {}
1962
1963 /* get the correct level of the next run */
1964 if(limit<length) {
1965 nextLevel=levels[limit];
1966 } else {
1967 nextLevel=GET_PARALEVEL(pBiDi, length-1);
1968 }
1969
1970 /* determine eor from max(level, nextLevel); sor is last run's eor */
1971 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
1972 eor=GET_LR_FROM_LEVEL(nextLevel);
1973 } else {
1974 eor=GET_LR_FROM_LEVEL(level);
1975 }
1976
1977 /* if the run consists of overridden directional types, then there
1978 are no implicit types to be resolved */
1979 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
1980 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
1981 } else {
1982 /* remove the UBIDI_LEVEL_OVERRIDE flags */
1983 do {
1984 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
1985 } while(start<limit);
1986 }
1987 } while(limit<length);
1988 }
1989 /* check if we got any memory shortage while adding insert points */
1990 if (U_FAILURE(pBiDi->insertPoints.errorCode))
1991 {
1992 *pErrorCode=pBiDi->insertPoints.errorCode;
1993 return;
1994 }
1995 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
1996 adjustWSLevels(pBiDi);
1997 break;
1998 }
1999 /* add RLM for inverse Bidi with contextual orientation resolving
2000 * to RTL which would not round-trip otherwise
2001 */
2002 if((pBiDi->defaultParaLevel>0) &&
2003 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2004 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2005 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2006 int32_t i, j, start, last;
2007 DirProp dirProp;
2008 for(i=0; i<pBiDi->paraCount; i++) {
2009 last=pBiDi->paras[i]-1;
2010 if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) {
2011 continue; /* LTR paragraph */
2012 }
2013 start= i==0 ? 0 : pBiDi->paras[i - 1];
2014 for(j=last; j>=start; j--) {
2015 dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]);
2016 if(dirProp==L) {
2017 if(j<last) {
2018 while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) {
2019 last--;
2020 }
2021 }
2022 addPoint(pBiDi, last, RLM_BEFORE);
2023 break;
2024 }
2025 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2026 break;
2027 }
2028 }
2029 }
2030 }
2031
2032 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2033 pBiDi->resultLength -= pBiDi->controlCount;
2034 } else {
2035 pBiDi->resultLength += pBiDi->insertPoints.size;
2036 }
2037 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
2038 }
2039
2040 U_CAPI void U_EXPORT2
ubidi_orderParagraphsLTR(UBiDi * pBiDi,UBool orderParagraphsLTR)2041 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2042 if(pBiDi!=NULL) {
2043 pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2044 }
2045 }
2046
2047 U_CAPI UBool U_EXPORT2
ubidi_isOrderParagraphsLTR(UBiDi * pBiDi)2048 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2049 if(pBiDi!=NULL) {
2050 return pBiDi->orderParagraphsLTR;
2051 } else {
2052 return FALSE;
2053 }
2054 }
2055
2056 U_CAPI UBiDiDirection U_EXPORT2
ubidi_getDirection(const UBiDi * pBiDi)2057 ubidi_getDirection(const UBiDi *pBiDi) {
2058 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2059 return pBiDi->direction;
2060 } else {
2061 return UBIDI_LTR;
2062 }
2063 }
2064
2065 U_CAPI const UChar * U_EXPORT2
ubidi_getText(const UBiDi * pBiDi)2066 ubidi_getText(const UBiDi *pBiDi) {
2067 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2068 return pBiDi->text;
2069 } else {
2070 return NULL;
2071 }
2072 }
2073
2074 U_CAPI int32_t U_EXPORT2
ubidi_getLength(const UBiDi * pBiDi)2075 ubidi_getLength(const UBiDi *pBiDi) {
2076 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2077 return pBiDi->originalLength;
2078 } else {
2079 return 0;
2080 }
2081 }
2082
2083 U_CAPI int32_t U_EXPORT2
ubidi_getProcessedLength(const UBiDi * pBiDi)2084 ubidi_getProcessedLength(const UBiDi *pBiDi) {
2085 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2086 return pBiDi->length;
2087 } else {
2088 return 0;
2089 }
2090 }
2091
2092 U_CAPI int32_t U_EXPORT2
ubidi_getResultLength(const UBiDi * pBiDi)2093 ubidi_getResultLength(const UBiDi *pBiDi) {
2094 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2095 return pBiDi->resultLength;
2096 } else {
2097 return 0;
2098 }
2099 }
2100
2101 /* paragraphs API functions ------------------------------------------------- */
2102
2103 U_CAPI UBiDiLevel U_EXPORT2
ubidi_getParaLevel(const UBiDi * pBiDi)2104 ubidi_getParaLevel(const UBiDi *pBiDi) {
2105 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2106 return pBiDi->paraLevel;
2107 } else {
2108 return 0;
2109 }
2110 }
2111
2112 U_CAPI int32_t U_EXPORT2
ubidi_countParagraphs(UBiDi * pBiDi)2113 ubidi_countParagraphs(UBiDi *pBiDi) {
2114 if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2115 return 0;
2116 } else {
2117 return pBiDi->paraCount;
2118 }
2119 }
2120
2121 U_CAPI void U_EXPORT2
ubidi_getParagraphByIndex(const UBiDi * pBiDi,int32_t paraIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2122 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2123 int32_t *pParaStart, int32_t *pParaLimit,
2124 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2125 int32_t paraStart;
2126
2127 /* check the argument values */
2128 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2129 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2130 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2131
2132 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
2133 if(paraIndex) {
2134 paraStart=pBiDi->paras[paraIndex-1];
2135 } else {
2136 paraStart=0;
2137 }
2138 if(pParaStart!=NULL) {
2139 *pParaStart=paraStart;
2140 }
2141 if(pParaLimit!=NULL) {
2142 *pParaLimit=pBiDi->paras[paraIndex];
2143 }
2144 if(pParaLevel!=NULL) {
2145 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2146 }
2147 }
2148
2149 U_CAPI int32_t U_EXPORT2
ubidi_getParagraph(const UBiDi * pBiDi,int32_t charIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2150 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2151 int32_t *pParaStart, int32_t *pParaLimit,
2152 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2153 uint32_t paraIndex;
2154
2155 /* check the argument values */
2156 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2157 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2158 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2159 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
2160 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2161
2162 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++);
2163 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2164 return paraIndex;
2165 }
2166
2167 U_CAPI void U_EXPORT2
ubidi_setClassCallback(UBiDi * pBiDi,UBiDiClassCallback * newFn,const void * newContext,UBiDiClassCallback ** oldFn,const void ** oldContext,UErrorCode * pErrorCode)2168 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2169 const void *newContext, UBiDiClassCallback **oldFn,
2170 const void **oldContext, UErrorCode *pErrorCode)
2171 {
2172 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2173 if(pBiDi==NULL) {
2174 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2175 return;
2176 }
2177 if( oldFn )
2178 {
2179 *oldFn = pBiDi->fnClassCallback;
2180 }
2181 if( oldContext )
2182 {
2183 *oldContext = pBiDi->coClassCallback;
2184 }
2185 pBiDi->fnClassCallback = newFn;
2186 pBiDi->coClassCallback = newContext;
2187 }
2188
2189 U_CAPI void U_EXPORT2
ubidi_getClassCallback(UBiDi * pBiDi,UBiDiClassCallback ** fn,const void ** context)2190 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
2191 {
2192 if(pBiDi==NULL) {
2193 return;
2194 }
2195 if( fn )
2196 {
2197 *fn = pBiDi->fnClassCallback;
2198 }
2199 if( context )
2200 {
2201 *context = pBiDi->coClassCallback;
2202 }
2203 }
2204
2205 U_CAPI UCharDirection U_EXPORT2
ubidi_getCustomizedClass(UBiDi * pBiDi,UChar32 c)2206 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
2207 {
2208 UCharDirection dir;
2209
2210 if( pBiDi->fnClassCallback == NULL ||
2211 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
2212 {
2213 return ubidi_getClass(pBiDi->bdp, c);
2214 } else {
2215 return dir;
2216 }
2217 }
2218
2219