• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 2000-2011, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *   file name:  ubidiwrt.c
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 1999aug06
14 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
15 *
16 * This file contains implementations for BiDi functions that use
17 * the core algorithm and core API to write reordered text.
18 */
19 
20 /* set import/export definitions */
21 #ifndef U_COMMON_IMPLEMENTATION
22 #   define U_COMMON_IMPLEMENTATION
23 #endif
24 
25 #include "unicode/utypes.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/ubidi.h"
29 #include "unicode/utf16.h"
30 #include "cmemory.h"
31 #include "ustr_imp.h"
32 #include "ubidiimp.h"
33 
34 /*
35  * The function implementations in this file are designed
36  * for UTF-16 and UTF-32, not for UTF-8.
37  *
38  * Assumptions that are not true for UTF-8:
39  * - Any code point always needs the same number of code units
40  *   ("minimum-length-problem" of UTF-8)
41  * - The BiDi control characters need only one code unit each
42  *
43  * Further assumptions for all UTFs:
44  * - u_charMirror(c) needs the same number of code units as c
45  */
46 #if UTF_SIZE==8
47 # error reimplement ubidi_writeReordered() for UTF-8, see comment above
48 #endif
49 
50 #define IS_COMBINING(type) ((1UL<<(type))&(1UL<<U_NON_SPACING_MARK|1UL<<U_COMBINING_SPACING_MARK|1UL<<U_ENCLOSING_MARK))
51 
52 /*
53  * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we
54  * semantically write RTL runs in reverse and later reverse them again.
55  * Instead, we actually write them in forward order to begin with.
56  * However, if the RTL run was to be mirrored, we need to mirror here now
57  * since the implicit second reversal must not do it.
58  * It looks strange to do mirroring in LTR output, but it is only because
59  * we are writing RTL output in reverse.
60  */
61 static int32_t
doWriteForward(const UChar * src,int32_t srcLength,UChar * dest,int32_t destSize,uint16_t options,UErrorCode * pErrorCode)62 doWriteForward(const UChar *src, int32_t srcLength,
63                UChar *dest, int32_t destSize,
64                uint16_t options,
65                UErrorCode *pErrorCode) {
66     /* optimize for several combinations of options */
67     switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING)) {
68     case 0: {
69         /* simply copy the LTR run to the destination */
70         int32_t length=srcLength;
71         if(destSize<length) {
72             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
73             return srcLength;
74         }
75         do {
76             *dest++=*src++;
77         } while(--length>0);
78         return srcLength;
79     }
80     case UBIDI_DO_MIRRORING: {
81         /* do mirroring */
82         int32_t i=0, j=0;
83         UChar32 c;
84 
85         if(destSize<srcLength) {
86             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
87             return srcLength;
88         }
89         do {
90             U16_NEXT(src, i, srcLength, c);
91             c=u_charMirror(c);
92             U16_APPEND_UNSAFE(dest, j, c);
93         } while(i<srcLength);
94         return srcLength;
95     }
96     case UBIDI_REMOVE_BIDI_CONTROLS: {
97         /* copy the LTR run and remove any BiDi control characters */
98         int32_t remaining=destSize;
99         UChar c;
100         do {
101             c=*src++;
102             if(!IS_BIDI_CONTROL_CHAR(c)) {
103                 if(--remaining<0) {
104                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
105 
106                     /* preflight the length */
107                     while(--srcLength>0) {
108                         c=*src++;
109                         if(!IS_BIDI_CONTROL_CHAR(c)) {
110                             --remaining;
111                         }
112                     }
113                     return destSize-remaining;
114                 }
115                 *dest++=c;
116             }
117         } while(--srcLength>0);
118         return destSize-remaining;
119     }
120     default: {
121         /* remove BiDi control characters and do mirroring */
122         int32_t remaining=destSize;
123         int32_t i, j=0;
124         UChar32 c;
125         do {
126             i=0;
127             U16_NEXT(src, i, srcLength, c);
128             src+=i;
129             srcLength-=i;
130             if(!IS_BIDI_CONTROL_CHAR(c)) {
131                 remaining-=i;
132                 if(remaining<0) {
133                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
134 
135                     /* preflight the length */
136                     while(srcLength>0) {
137                         c=*src++;
138                         if(!IS_BIDI_CONTROL_CHAR(c)) {
139                             --remaining;
140                         }
141                         --srcLength;
142                     }
143                     return destSize-remaining;
144                 }
145                 c=u_charMirror(c);
146                 U16_APPEND_UNSAFE(dest, j, c);
147             }
148         } while(srcLength>0);
149         return j;
150     }
151     } /* end of switch */
152 }
153 
154 static int32_t
doWriteReverse(const UChar * src,int32_t srcLength,UChar * dest,int32_t destSize,uint16_t options,UErrorCode * pErrorCode)155 doWriteReverse(const UChar *src, int32_t srcLength,
156                UChar *dest, int32_t destSize,
157                uint16_t options,
158                UErrorCode *pErrorCode) {
159     /*
160      * RTL run -
161      *
162      * RTL runs need to be copied to the destination in reverse order
163      * of code points, not code units, to keep Unicode characters intact.
164      *
165      * The general strategy for this is to read the source text
166      * in backward order, collect all code units for a code point
167      * (and optionally following combining characters, see below),
168      * and copy all these code units in ascending order
169      * to the destination for this run.
170      *
171      * Several options request whether combining characters
172      * should be kept after their base characters,
173      * whether BiDi control characters should be removed, and
174      * whether characters should be replaced by their mirror-image
175      * equivalent Unicode characters.
176      */
177     int32_t i, j;
178     UChar32 c;
179 
180     /* optimize for several combinations of options */
181     switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) {
182     case 0:
183         /*
184          * With none of the "complicated" options set, the destination
185          * run will have the same length as the source run,
186          * and there is no mirroring and no keeping combining characters
187          * with their base characters.
188          */
189         if(destSize<srcLength) {
190             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
191             return srcLength;
192         }
193         destSize=srcLength;
194 
195         /* preserve character integrity */
196         do {
197             /* i is always after the last code unit known to need to be kept in this segment */
198             i=srcLength;
199 
200             /* collect code units for one base character */
201             U16_BACK_1(src, 0, srcLength);
202 
203             /* copy this base character */
204             j=srcLength;
205             do {
206                 *dest++=src[j++];
207             } while(j<i);
208         } while(srcLength>0);
209         break;
210     case UBIDI_KEEP_BASE_COMBINING:
211         /*
212          * Here, too, the destination
213          * run will have the same length as the source run,
214          * and there is no mirroring.
215          * We do need to keep combining characters with their base characters.
216          */
217         if(destSize<srcLength) {
218             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
219             return srcLength;
220         }
221         destSize=srcLength;
222 
223         /* preserve character integrity */
224         do {
225             /* i is always after the last code unit known to need to be kept in this segment */
226             i=srcLength;
227 
228             /* collect code units and modifier letters for one base character */
229             do {
230                 U16_PREV(src, 0, srcLength, c);
231             } while(srcLength>0 && IS_COMBINING(u_charType(c)));
232 
233             /* copy this "user character" */
234             j=srcLength;
235             do {
236                 *dest++=src[j++];
237             } while(j<i);
238         } while(srcLength>0);
239         break;
240     default:
241         /*
242          * With several "complicated" options set, this is the most
243          * general and the slowest copying of an RTL run.
244          * We will do mirroring, remove BiDi controls, and
245          * keep combining characters with their base characters
246          * as requested.
247          */
248         if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) {
249             i=srcLength;
250         } else {
251             /* we need to find out the destination length of the run,
252                which will not include the BiDi control characters */
253             int32_t length=srcLength;
254             UChar ch;
255 
256             i=0;
257             do {
258                 ch=*src++;
259                 if(!IS_BIDI_CONTROL_CHAR(ch)) {
260                     ++i;
261                 }
262             } while(--length>0);
263             src-=srcLength;
264         }
265 
266         if(destSize<i) {
267             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
268             return i;
269         }
270         destSize=i;
271 
272         /* preserve character integrity */
273         do {
274             /* i is always after the last code unit known to need to be kept in this segment */
275             i=srcLength;
276 
277             /* collect code units for one base character */
278             U16_PREV(src, 0, srcLength, c);
279             if(options&UBIDI_KEEP_BASE_COMBINING) {
280                 /* collect modifier letters for this base character */
281                 while(srcLength>0 && IS_COMBINING(u_charType(c))) {
282                     U16_PREV(src, 0, srcLength, c);
283                 }
284             }
285 
286             if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) {
287                 /* do not copy this BiDi control character */
288                 continue;
289             }
290 
291             /* copy this "user character" */
292             j=srcLength;
293             if(options&UBIDI_DO_MIRRORING) {
294                 /* mirror only the base character */
295                 int32_t k=0;
296                 c=u_charMirror(c);
297                 U16_APPEND_UNSAFE(dest, k, c);
298                 dest+=k;
299                 j+=k;
300             }
301             while(j<i) {
302                 *dest++=src[j++];
303             }
304         } while(srcLength>0);
305         break;
306     } /* end of switch */
307 
308     return destSize;
309 }
310 
311 U_CAPI int32_t U_EXPORT2
ubidi_writeReverse(const UChar * src,int32_t srcLength,UChar * dest,int32_t destSize,uint16_t options,UErrorCode * pErrorCode)312 ubidi_writeReverse(const UChar *src, int32_t srcLength,
313                    UChar *dest, int32_t destSize,
314                    uint16_t options,
315                    UErrorCode *pErrorCode) {
316     int32_t destLength;
317 
318     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
319         return 0;
320     }
321 
322     /* more error checking */
323     if( src==NULL || srcLength<-1 ||
324         destSize<0 || (destSize>0 && dest==NULL))
325     {
326         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
327         return 0;
328     }
329 
330     /* do input and output overlap? */
331     if( dest!=NULL &&
332         ((src>=dest && src<dest+destSize) ||
333          (dest>=src && dest<src+srcLength)))
334     {
335         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
336         return 0;
337     }
338 
339     if(srcLength==-1) {
340         srcLength=u_strlen(src);
341     }
342     if(srcLength>0) {
343         destLength=doWriteReverse(src, srcLength, dest, destSize, options, pErrorCode);
344     } else {
345         /* nothing to do */
346         destLength=0;
347     }
348 
349     return u_terminateUChars(dest, destSize, destLength, pErrorCode);
350 }
351 
352 U_CAPI int32_t U_EXPORT2
ubidi_writeReordered(UBiDi * pBiDi,UChar * dest,int32_t destSize,uint16_t options,UErrorCode * pErrorCode)353 ubidi_writeReordered(UBiDi *pBiDi,
354                      UChar *dest, int32_t destSize,
355                      uint16_t options,
356                      UErrorCode *pErrorCode) {
357     const UChar *text;
358     UChar *saveDest;
359     int32_t length, destCapacity;
360     int32_t run, runCount, logicalStart, runLength;
361 
362     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
363         return 0;
364     }
365 
366     /* more error checking */
367     if( pBiDi==NULL ||
368         (text=pBiDi->text)==NULL || (length=pBiDi->length)<0 ||
369         destSize<0 || (destSize>0 && dest==NULL))
370     {
371         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
372         return 0;
373     }
374 
375     /* do input and output overlap? */
376     if( dest!=NULL &&
377         ((text>=dest && text<dest+destSize) ||
378          (dest>=text && dest<text+pBiDi->originalLength)))
379     {
380         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
381         return 0;
382     }
383 
384     if(length==0) {
385         /* nothing to do */
386         return u_terminateUChars(dest, destSize, 0, pErrorCode);
387     }
388 
389     runCount=ubidi_countRuns(pBiDi, pErrorCode);
390     if(U_FAILURE(*pErrorCode)) {
391         return 0;
392     }
393 
394     /* destSize shrinks, later destination length=destCapacity-destSize */
395     saveDest=dest;
396     destCapacity=destSize;
397 
398     /*
399      * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the
400      * reordering mode (checked below) is appropriate.
401      */
402     if(pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
403         options|=UBIDI_INSERT_LRM_FOR_NUMERIC;
404         options&=~UBIDI_REMOVE_BIDI_CONTROLS;
405     }
406     /*
407      * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS
408      * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC.
409      */
410     if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
411         options|=UBIDI_REMOVE_BIDI_CONTROLS;
412         options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
413     }
414     /*
415      * If we do not perform the "inverse BiDi" algorithm, then we
416      * don't need to insert any LRMs, and don't need to test for it.
417      */
418     if((pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_NUMBERS_AS_L) &&
419        (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_LIKE_DIRECT)  &&
420        (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
421        (pBiDi->reorderingMode != UBIDI_REORDER_RUNS_ONLY)) {
422         options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
423     }
424     /*
425      * Iterate through all visual runs and copy the run text segments to
426      * the destination, according to the options.
427      *
428      * The tests for where to insert LRMs ignore the fact that there may be
429      * BN codes or non-BMP code points at the beginning and end of a run;
430      * they may insert LRMs unnecessarily but the tests are faster this way
431      * (this would have to be improved for UTF-8).
432      *
433      * Note that the only errors that are set by doWriteXY() are buffer overflow
434      * errors. Ignore them until the end, and continue for preflighting.
435      */
436     if(!(options&UBIDI_OUTPUT_REVERSE)) {
437         /* forward output */
438         if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
439             /* do not insert BiDi controls */
440             for(run=0; run<runCount; ++run) {
441                 if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
442                     runLength=doWriteForward(text+logicalStart, runLength,
443                                              dest, destSize,
444                                              (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
445                 } else {
446                     runLength=doWriteReverse(text+logicalStart, runLength,
447                                              dest, destSize,
448                                              options, pErrorCode);
449                 }
450                 if(dest!=NULL) {
451                   dest+=runLength;
452                 }
453                 destSize-=runLength;
454             }
455         } else {
456             /* insert BiDi controls for "inverse BiDi" */
457             const DirProp *dirProps=pBiDi->dirProps;
458             const UChar *src;
459             UChar uc;
460             UBiDiDirection dir;
461             int32_t markFlag;
462 
463             for(run=0; run<runCount; ++run) {
464                 dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
465                 src=text+logicalStart;
466                 /* check if something relevant in insertPoints */
467                 markFlag=pBiDi->runs[run].insertRemove;
468                 if(markFlag<0) {        /* BiDi controls count */
469                     markFlag=0;
470                 }
471 
472                 if(UBIDI_LTR==dir) {
473                     if((pBiDi->isInverse) &&
474                        (/*run>0 &&*/ dirProps[logicalStart]!=L)) {
475                         markFlag |= LRM_BEFORE;
476                     }
477                     if (markFlag & LRM_BEFORE) {
478                         uc=LRM_CHAR;
479                     }
480                     else if (markFlag & RLM_BEFORE) {
481                         uc=RLM_CHAR;
482                     }
483                     else  uc=0;
484                     if(uc) {
485                         if(destSize>0) {
486                             *dest++=uc;
487                         }
488                         --destSize;
489                     }
490 
491                     runLength=doWriteForward(src, runLength,
492                                              dest, destSize,
493                                              (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
494                     if(dest!=NULL) {
495                       dest+=runLength;
496                     }
497                     destSize-=runLength;
498 
499                     if((pBiDi->isInverse) &&
500                        (/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L)) {
501                         markFlag |= LRM_AFTER;
502                     }
503                     if (markFlag & LRM_AFTER) {
504                         uc=LRM_CHAR;
505                     }
506                     else if (markFlag & RLM_AFTER) {
507                         uc=RLM_CHAR;
508                     }
509                     else  uc=0;
510                     if(uc) {
511                         if(destSize>0) {
512                             *dest++=uc;
513                         }
514                         --destSize;
515                     }
516                 } else {                /* RTL run */
517                     if((pBiDi->isInverse) &&
518                        (/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1])))) {
519                         markFlag |= RLM_BEFORE;
520                     }
521                     if (markFlag & LRM_BEFORE) {
522                         uc=LRM_CHAR;
523                     }
524                     else if (markFlag & RLM_BEFORE) {
525                         uc=RLM_CHAR;
526                     }
527                     else  uc=0;
528                     if(uc) {
529                         if(destSize>0) {
530                             *dest++=uc;
531                         }
532                         --destSize;
533                     }
534 
535                     runLength=doWriteReverse(src, runLength,
536                                              dest, destSize,
537                                              options, pErrorCode);
538                     if(dest!=NULL) {
539                       dest+=runLength;
540                     }
541                     destSize-=runLength;
542 
543                     if((pBiDi->isInverse) &&
544                        (/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart])))) {
545                         markFlag |= RLM_AFTER;
546                     }
547                     if (markFlag & LRM_AFTER) {
548                         uc=LRM_CHAR;
549                     }
550                     else if (markFlag & RLM_AFTER) {
551                         uc=RLM_CHAR;
552                     }
553                     else  uc=0;
554                     if(uc) {
555                         if(destSize>0) {
556                             *dest++=uc;
557                         }
558                         --destSize;
559                     }
560                 }
561             }
562         }
563     } else {
564         /* reverse output */
565         if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
566             /* do not insert BiDi controls */
567             for(run=runCount; --run>=0;) {
568                 if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
569                     runLength=doWriteReverse(text+logicalStart, runLength,
570                                              dest, destSize,
571                                              (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
572                 } else {
573                     runLength=doWriteForward(text+logicalStart, runLength,
574                                              dest, destSize,
575                                              options, pErrorCode);
576                 }
577                 if(dest!=NULL) {
578                   dest+=runLength;
579                 }
580                 destSize-=runLength;
581             }
582         } else {
583             /* insert BiDi controls for "inverse BiDi" */
584             const DirProp *dirProps=pBiDi->dirProps;
585             const UChar *src;
586             UBiDiDirection dir;
587 
588             for(run=runCount; --run>=0;) {
589                 /* reverse output */
590                 dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
591                 src=text+logicalStart;
592 
593                 if(UBIDI_LTR==dir) {
594                     if(/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L) {
595                         if(destSize>0) {
596                             *dest++=LRM_CHAR;
597                         }
598                         --destSize;
599                     }
600 
601                     runLength=doWriteReverse(src, runLength,
602                                              dest, destSize,
603                                              (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
604                     if(dest!=NULL) {
605                       dest+=runLength;
606                     }
607                     destSize-=runLength;
608 
609                     if(/*run>0 &&*/ dirProps[logicalStart]!=L) {
610                         if(destSize>0) {
611                             *dest++=LRM_CHAR;
612                         }
613                         --destSize;
614                     }
615                 } else {
616                     if(/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart]))) {
617                         if(destSize>0) {
618                             *dest++=RLM_CHAR;
619                         }
620                         --destSize;
621                     }
622 
623                     runLength=doWriteForward(src, runLength,
624                                              dest, destSize,
625                                              options, pErrorCode);
626                     if(dest!=NULL) {
627                       dest+=runLength;
628                     }
629                     destSize-=runLength;
630 
631                     if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) {
632                         if(destSize>0) {
633                             *dest++=RLM_CHAR;
634                         }
635                         --destSize;
636                     }
637                 }
638             }
639         }
640     }
641 
642     return u_terminateUChars(saveDest, destCapacity, destCapacity-destSize, pErrorCode);
643 }
644