• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *******************************************************************************
6 *   Copyright (C) 2001-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *******************************************************************************
9 */
10 
11 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
12  * algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre
13  * concept of RUNS_ONLY which is a double operation.
14  * It could be advantageous to divide this into 3 concepts:
15  * a) Operation: direct / inverse / RUNS_ONLY
16  * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L
17  * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
18  * This would allow combinations not possible today like RUNS_ONLY with
19  * NUMBERS_SPECIAL.
20  * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
21  * REMOVE_CONTROLS for the inverse step.
22  * Not all combinations would be supported, and probably not all do make sense.
23  * This would need to document which ones are supported and what are the
24  * fallbacks for unsupported combinations.
25  */
26 
27 //TODO: make sample program do something simple but real and complete
28 
29 package ohos.global.icu.text;
30 
31 import java.awt.font.NumericShaper;
32 import java.awt.font.TextAttribute;
33 import java.lang.reflect.Array;
34 import java.text.AttributedCharacterIterator;
35 import java.util.Arrays;
36 
37 import ohos.global.icu.impl.UBiDiProps;
38 import ohos.global.icu.lang.UCharacter;
39 import ohos.global.icu.lang.UCharacterDirection;
40 import ohos.global.icu.lang.UProperty;
41 
42 /**
43  *
44  * <h2>Bidi algorithm for ICU</h2>
45  *
46  * This is an implementation of the Unicode Bidirectional Algorithm. The
47  * algorithm is defined in the <a
48  * href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.
49  * <p>
50  *
51  * Note: Libraries that perform a bidirectional algorithm and reorder strings
52  * accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and
53  * shaping (ArabicShaping) classes can be used at the core of such "Storage
54  * Layout Engines".
55  *
56  * <h3>General remarks about the API:</h3>
57  *
58  * The &quot;limit&quot; of a sequence of characters is the position just after
59  * their last character, i.e., one more than that position.
60  * <p>
61  *
62  * Some of the API methods provide access to &quot;runs&quot;. Such a
63  * &quot;run&quot; is defined as a sequence of characters that are at the same
64  * embedding level after performing the Bidi algorithm.
65  *
66  * <h3>Basic concept: paragraph</h3>
67  * A piece of text can be divided into several paragraphs by characters
68  * with the Bidi class <code>Block Separator</code>. For handling of
69  * paragraphs, see:
70  * <ul>
71  * <li>{@link #countParagraphs}
72  * <li>{@link #getParaLevel}
73  * <li>{@link #getParagraph}
74  * <li>{@link #getParagraphByIndex}
75  * </ul>
76  *
77  * <h3>Basic concept: text direction</h3>
78  * The direction of a piece of text may be:
79  * <ul>
80  * <li>{@link #LTR}
81  * <li>{@link #RTL}
82  * <li>{@link #MIXED}
83  * <li>{@link #NEUTRAL}
84  * </ul>
85  *
86  * <h3>Basic concept: levels</h3>
87  *
88  * Levels in this API represent embedding levels according to the Unicode
89  * Bidirectional Algorithm.
90  * Their low-order bit (even/odd value) indicates the visual direction.<p>
91  *
92  * Levels can be abstract values when used for the
93  * <code>paraLevel</code> and <code>embeddingLevels</code>
94  * arguments of <code>setPara()</code>; there:
95  * <ul>
96  * <li>the high-order bit of an <code>embeddingLevels[]</code>
97  * value indicates whether the using application is
98  * specifying the level of a character to <i>override</i> whatever the
99  * Bidi implementation would resolve it to.</li>
100  * <li><code>paraLevel</code> can be set to the
101  * pseudo-level values <code>LEVEL_DEFAULT_LTR</code>
102  * and <code>LEVEL_DEFAULT_RTL</code>.</li>
103  * </ul>
104  *
105  * <p>The related constants are not real, valid level values.
106  * <code>DEFAULT_XXX</code> can be used to specify
107  * a default for the paragraph level for
108  * when the <code>setPara()</code> method
109  * shall determine it but there is no
110  * strongly typed character in the input.<p>
111  *
112  * Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even
113  * and the one for <code>LEVEL_DEFAULT_RTL</code> is odd,
114  * just like with normal LTR and RTL level values -
115  * these special values are designed that way. Also, the implementation
116  * assumes that MAX_EXPLICIT_LEVEL is odd.
117  *
118  * <p>Note: The numeric values of the related constants will not change:
119  * They are tied to the use of 7-bit byte values (plus the override bit)
120  * and of the byte data type in this API.
121  *
122  * <p><b>See Also:</b>
123  * <ul>
124  * <li>{@link #LEVEL_DEFAULT_LTR}
125  * <li>{@link #LEVEL_DEFAULT_RTL}
126  * <li>{@link #LEVEL_OVERRIDE}
127  * <li>{@link #MAX_EXPLICIT_LEVEL}
128  * <li>{@link #setPara}
129  * </ul>
130  *
131  * <h3>Basic concept: Reordering Mode</h3>
132  * Reordering mode values indicate which variant of the Bidi algorithm to
133  * use.
134  *
135  * <b>See Also:</b>
136  * <ul>
137  * <li>{@link #setReorderingMode}
138  * <li>{@link #REORDER_DEFAULT}
139  * <li>{@link #REORDER_NUMBERS_SPECIAL}
140  * <li>{@link #REORDER_GROUP_NUMBERS_WITH_R}
141  * <li>{@link #REORDER_RUNS_ONLY}
142  * <li>{@link #REORDER_INVERSE_NUMBERS_AS_L}
143  * <li>{@link #REORDER_INVERSE_LIKE_DIRECT}
144  * <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL}
145  * </ul>
146  *
147  * <h3>Basic concept: Reordering Options</h3>
148  * Reordering options can be applied during Bidi text transformations.
149  *
150  * <b>See Also:</b>
151  * <ul>
152  * <li>{@link #setReorderingOptions}
153  * <li>{@link #OPTION_DEFAULT}
154  * <li>{@link #OPTION_INSERT_MARKS}
155  * <li>{@link #OPTION_REMOVE_CONTROLS}
156  * <li>{@link #OPTION_STREAMING}
157  * </ul>
158  *
159  * <h4> Sample code for the ICU Bidi API </h4>
160  *
161  * <h5>Rendering a paragraph with the ICU Bidi API</h5>
162  *
163  * This is (hypothetical) sample code that illustrates how the ICU Bidi API
164  * could be used to render a paragraph of text. Rendering code depends highly on
165  * the graphics system, therefore this sample code must make a lot of
166  * assumptions, which may or may not match any existing graphics system's
167  * properties.
168  *
169  * <p>
170  * The basic assumptions are:
171  *
172  * <ul>
173  * <li>Rendering is done from left to right on a horizontal line.</li>
174  * <li>A run of single-style, unidirectional text can be rendered at once.
175  * </li>
176  * <li>Such a run of text is passed to the graphics system with characters
177  * (code units) in logical order.</li>
178  * <li>The line-breaking algorithm is very complicated and Locale-dependent -
179  * and therefore its implementation omitted from this sample code.</li>
180  * </ul>
181  *
182  * <pre>
183  *
184  *  package ohos.global.icu.dev.test.bidi;
185  *
186  *  import ohos.global.icu.text.Bidi;
187  *  import ohos.global.icu.text.BidiRun;
188  *
189  *  public class Sample {
190  *
191  *      static final int styleNormal = 0;
192  *      static final int styleSelected = 1;
193  *      static final int styleBold = 2;
194  *      static final int styleItalics = 4;
195  *      static final int styleSuper=8;
196  *      static final int styleSub = 16;
197  *
198  *      static class StyleRun {
199  *          int limit;
200  *          int style;
201  *
202  *          public StyleRun(int limit, int style) {
203  *              this.limit = limit;
204  *              this.style = style;
205  *          }
206  *      }
207  *
208  *      static class Bounds {
209  *          int start;
210  *          int limit;
211  *
212  *          public Bounds(int start, int limit) {
213  *              this.start = start;
214  *              this.limit = limit;
215  *          }
216  *      }
217  *
218  *      static int getTextWidth(String text, int start, int limit,
219  *                              StyleRun[] styleRuns, int styleRunCount) {
220  *          // simplistic way to compute the width
221  *          return limit - start;
222  *      }
223  *
224  *      // set limit and StyleRun limit for a line
225  *      // from text[start] and from styleRuns[styleRunStart]
226  *      // using Bidi.getLogicalRun(...)
227  *      // returns line width
228  *      static int getLineBreak(String text, Bounds line, Bidi para,
229  *                              StyleRun styleRuns[], Bounds styleRun) {
230  *          // dummy return
231  *          return 0;
232  *      }
233  *
234  *      // render runs on a line sequentially, always from left to right
235  *
236  *      // prepare rendering a new line
237  *      static void startLine(byte textDirection, int lineWidth) {
238  *          System.out.println();
239  *      }
240  *
241  *      // render a run of text and advance to the right by the run width
242  *      // the text[start..limit-1] is always in logical order
243  *      static void renderRun(String text, int start, int limit,
244  *                            byte textDirection, int style) {
245  *      }
246  *
247  *      // We could compute a cross-product
248  *      // from the style runs with the directional runs
249  *      // and then reorder it.
250  *      // Instead, here we iterate over each run type
251  *      // and render the intersections -
252  *      // with shortcuts in simple (and common) cases.
253  *      // renderParagraph() is the main function.
254  *
255  *      // render a directional run with
256  *      // (possibly) multiple style runs intersecting with it
257  *      static void renderDirectionalRun(String text, int start, int limit,
258  *                                       byte direction, StyleRun styleRuns[],
259  *                                       int styleRunCount) {
260  *          int i;
261  *
262  *          // iterate over style runs
263  *          if (direction == Bidi.LTR) {
264  *              int styleLimit;
265  *              for (i = 0; i &lt; styleRunCount; ++i) {
266  *                  styleLimit = styleRuns[i].limit;
267  *                  if (start &lt; styleLimit) {
268  *                      if (styleLimit &gt; limit) {
269  *                          styleLimit = limit;
270  *                      }
271  *                      renderRun(text, start, styleLimit,
272  *                                direction, styleRuns[i].style);
273  *                      if (styleLimit == limit) {
274  *                          break;
275  *                      }
276  *                      start = styleLimit;
277  *                  }
278  *              }
279  *          } else {
280  *              int styleStart;
281  *
282  *              for (i = styleRunCount-1; i &gt;= 0; --i) {
283  *                  if (i &gt; 0) {
284  *                      styleStart = styleRuns[i-1].limit;
285  *                  } else {
286  *                      styleStart = 0;
287  *                  }
288  *                  if (limit &gt;= styleStart) {
289  *                      if (styleStart &lt; start) {
290  *                          styleStart = start;
291  *                      }
292  *                      renderRun(text, styleStart, limit, direction,
293  *                                styleRuns[i].style);
294  *                      if (styleStart == start) {
295  *                          break;
296  *                      }
297  *                      limit = styleStart;
298  *                  }
299  *              }
300  *          }
301  *      }
302  *
303  *      // the line object represents text[start..limit-1]
304  *      static void renderLine(Bidi line, String text, int start, int limit,
305  *                             StyleRun styleRuns[], int styleRunCount) {
306  *          byte direction = line.getDirection();
307  *          if (direction != Bidi.MIXED) {
308  *              // unidirectional
309  *              if (styleRunCount &lt;= 1) {
310  *                  renderRun(text, start, limit, direction, styleRuns[0].style);
311  *              } else {
312  *                  renderDirectionalRun(text, start, limit, direction,
313  *                                       styleRuns, styleRunCount);
314  *              }
315  *          } else {
316  *              // mixed-directional
317  *              int count, i;
318  *              BidiRun run;
319  *
320  *              try {
321  *                  count = line.countRuns();
322  *              } catch (IllegalStateException e) {
323  *                  e.printStackTrace();
324  *                  return;
325  *              }
326  *              if (styleRunCount &lt;= 1) {
327  *                  int style = styleRuns[0].style;
328  *
329  *                  // iterate over directional runs
330  *                  for (i = 0; i &lt; count; ++i) {
331  *                      run = line.getVisualRun(i);
332  *                      renderRun(text, run.getStart(), run.getLimit(),
333  *                                run.getDirection(), style);
334  *                  }
335  *              } else {
336  *                  // iterate over both directional and style runs
337  *                  for (i = 0; i &lt; count; ++i) {
338  *                      run = line.getVisualRun(i);
339  *                      renderDirectionalRun(text, run.getStart(),
340  *                                           run.getLimit(), run.getDirection(),
341  *                                           styleRuns, styleRunCount);
342  *                  }
343  *              }
344  *          }
345  *      }
346  *
347  *      static void renderParagraph(String text, byte textDirection,
348  *                                  StyleRun styleRuns[], int styleRunCount,
349  *                                  int lineWidth) {
350  *          int length = text.length();
351  *          Bidi para = new Bidi();
352  *          try {
353  *              para.setPara(text,
354  *                           textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL
355  *                                              : Bidi.LEVEL_DEFAULT_LTR,
356  *                           null);
357  *          } catch (Exception e) {
358  *              e.printStackTrace();
359  *              return;
360  *          }
361  *          byte paraLevel = (byte)(1 &amp; para.getParaLevel());
362  *          StyleRun styleRun = new StyleRun(length, styleNormal);
363  *
364  *          if (styleRuns == null || styleRunCount &lt;= 0) {
365  *              styleRuns = new StyleRun[1];
366  *              styleRunCount = 1;
367  *              styleRuns[0] = styleRun;
368  *          }
369  *          // assume styleRuns[styleRunCount-1].limit&gt;=length
370  *
371  *          int width = getTextWidth(text, 0, length, styleRuns, styleRunCount);
372  *          if (width &lt;= lineWidth) {
373  *              // everything fits onto one line
374  *
375  *              // prepare rendering a new line from either left or right
376  *              startLine(paraLevel, width);
377  *
378  *              renderLine(para, text, 0, length, styleRuns, styleRunCount);
379  *          } else {
380  *              // we need to render several lines
381  *              Bidi line = new Bidi(length, 0);
382  *              int start = 0, limit;
383  *              int styleRunStart = 0, styleRunLimit;
384  *
385  *              for (;;) {
386  *                  limit = length;
387  *                  styleRunLimit = styleRunCount;
388  *                  width = getLineBreak(text, new Bounds(start, limit),
389  *                                       para, styleRuns,
390  *                                       new Bounds(styleRunStart, styleRunLimit));
391  *                  try {
392  *                      line = para.setLine(start, limit);
393  *                  } catch (Exception e) {
394  *                      e.printStackTrace();
395  *                      return;
396  *                  }
397  *                  // prepare rendering a new line
398  *                  // from either left or right
399  *                  startLine(paraLevel, width);
400  *
401  *                  if (styleRunStart &gt; 0) {
402  *                      int newRunCount = styleRuns.length - styleRunStart;
403  *                      StyleRun[] newRuns = new StyleRun[newRunCount];
404  *                      System.arraycopy(styleRuns, styleRunStart, newRuns, 0,
405  *                                       newRunCount);
406  *                      renderLine(line, text, start, limit, newRuns,
407  *                                 styleRunLimit - styleRunStart);
408  *                  } else {
409  *                      renderLine(line, text, start, limit, styleRuns,
410  *                                 styleRunLimit - styleRunStart);
411  *                  }
412  *                  if (limit == length) {
413  *                      break;
414  *                  }
415  *                  start = limit;
416  *                  styleRunStart = styleRunLimit - 1;
417  *                  if (start &gt;= styleRuns[styleRunStart].limit) {
418  *                      ++styleRunStart;
419  *                  }
420  *              }
421  *          }
422  *      }
423  *
424  *      public static void main(String[] args)
425  *      {
426  *          renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80);
427  *          renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60);
428  *      }
429  *  }
430  *
431  * </pre>
432  *
433  * @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer)
434  */
435 
436 /*
437  * General implementation notes:
438  *
439  * Throughout the implementation, there are comments like (W2) that refer to
440  * rules of the BiDi algorithm, in this example to the second rule of the
441  * resolution of weak types.
442  *
443  * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
444  * character according to UTF-16, the second UChar gets the directional property of
445  * the entire character assigned, while the first one gets a BN, a boundary
446  * neutral, type, which is ignored by most of the algorithm according to
447  * rule (X9) and the implementation suggestions of the BiDi algorithm.
448  *
449  * Later, adjustWSLevels() will set the level for each BN to that of the
450  * following character (UChar), which results in surrogate pairs getting the
451  * same level on each of their surrogates.
452  *
453  * In a UTF-8 implementation, the same thing could be done: the last byte of
454  * a multi-byte sequence would get the "real" property, while all previous
455  * bytes of that sequence would get BN.
456  *
457  * It is not possible to assign all those parts of a character the same real
458  * property because this would fail in the resolution of weak types with rules
459  * that look at immediately surrounding types.
460  *
461  * As a related topic, this implementation does not remove Boundary Neutral
462  * types from the input, but ignores them wherever this is relevant.
463  * For example, the loop for the resolution of the weak types reads
464  * types until it finds a non-BN.
465  * Also, explicit embedding codes are neither changed into BN nor removed.
466  * They are only treated the same way real BNs are.
467  * As stated before, adjustWSLevels() takes care of them at the end.
468  * For the purpose of conformance, the levels of all these codes
469  * do not matter.
470  *
471  * Note that this implementation modifies the dirProps
472  * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
473  * X6, N0 (replace paired brackets by L or R).
474  *
475  * In this implementation, the resolution of weak types (W1 to W6),
476  * neutrals (N1 and N2), and the assignment of the resolved level (In)
477  * are all done in one single loop, in resolveImplicitLevels().
478  * Changes of dirProp values are done on the fly, without writing
479  * them back to the dirProps array.
480  *
481  *
482  * This implementation contains code that allows to bypass steps of the
483  * algorithm that are not needed on the specific paragraph
484  * in order to speed up the most common cases considerably,
485  * like text that is entirely LTR, or RTL text without numbers.
486  *
487  * Most of this is done by setting a bit for each directional property
488  * in a flags variable and later checking for whether there are
489  * any LTR characters or any RTL characters, or both, whether
490  * there are any explicit embedding codes, etc.
491  *
492  * If the (Xn) steps are performed, then the flags are re-evaluated,
493  * because they will then not contain the embedding codes any more
494  * and will be adjusted for override codes, so that subsequently
495  * more bypassing may be possible than what the initial flags suggested.
496  *
497  * If the text is not mixed-directional, then the
498  * algorithm steps for the weak type resolution are not performed,
499  * and all levels are set to the paragraph level.
500  *
501  * If there are no explicit embedding codes, then the (Xn) steps
502  * are not performed.
503  *
504  * If embedding levels are supplied as a parameter, then all
505  * explicit embedding codes are ignored, and the (Xn) steps
506  * are not performed.
507  *
508  * White Space types could get the level of the run they belong to,
509  * and are checked with a test of (flags&MASK_EMBEDDING) to
510  * consider if the paragraph direction should be considered in
511  * the flags variable.
512  *
513  * If there are no White Space types in the paragraph, then
514  * (L1) is not necessary in adjustWSLevels().
515  */
516 
517 public class Bidi {
518 
519     static class Point {
520         int pos;    /* position in text */
521         int flag;   /* flag for LRM/RLM, before/after */
522     }
523 
524     static class InsertPoints {
525         int size;
526         int confirmed;
527         Point[] points = new Point[0];
528     }
529 
530     static class Opening {
531         int   position;                 /* position of opening bracket */
532         int   match;                    /* matching char or -position of closing bracket */
533         int   contextPos;               /* position of last strong char found before opening */
534         short flags;                    /* bits for L or R/AL found within the pair */
535         byte  contextDir;               /* L or R according to last strong char before opening */
536     }
537 
538     static class IsoRun {
539         int   contextPos;               /* position of char determining context */
540         short start;                    /* index of first opening entry for this run */
541         short limit;                    /* index after last opening entry for this run */
542         byte  level;                    /* level of this run */
543         byte  lastStrong;               /* bidi class of last strong char found in this run */
544         byte  lastBase;                 /* bidi class of last base char found in this run */
545         byte  contextDir;               /* L or R to use as context for following openings */
546     }
547 
548     static class BracketData {
549         Opening[] openings = new Opening[SIMPLE_OPENINGS_COUNT];
550         int   isoRunLast;               /* index of last used entry */
551         /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
552            + 1 for index 0, + 1 for before the first isolated sequence */
553         IsoRun[]  isoRuns = new IsoRun[MAX_EXPLICIT_LEVEL+2];
554         boolean   isNumbersSpecial;     /*reordering mode for NUMBERS_SPECIAL */
555     }
556 
557     static class Isolate {
558         int   startON;
559         int   start1;
560         short stateImp;
561         short state;
562     }
563 
564     /** Paragraph level setting<p>
565      *
566      * Constant indicating that the base direction depends on the first strong
567      * directional character in the text according to the Unicode Bidirectional
568      * Algorithm. If no strong directional character is present,
569      * then set the paragraph level to 0 (left-to-right).<p>
570      *
571      * If this value is used in conjunction with reordering modes
572      * <code>REORDER_INVERSE_LIKE_DIRECT</code> or
573      * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
574      * is assumed to be visual LTR, and the text after reordering is required
575      * to be the corresponding logical string with appropriate contextual
576      * direction. The direction of the result string will be RTL if either
577      * the rightmost or leftmost strong character of the source text is RTL
578      * or Arabic Letter, the direction will be LTR otherwise.<p>
579      *
580      * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
581      * be added at the beginning of the result string to ensure round trip
582      * (that the result string, when reordered back to visual, will produce
583      * the original source text).
584      * @see #REORDER_INVERSE_LIKE_DIRECT
585      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
586      */
587     public static final byte LEVEL_DEFAULT_LTR = (byte)0x7e;
588 
589     /** Paragraph level setting<p>
590      *
591      * Constant indicating that the base direction depends on the first strong
592      * directional character in the text according to the Unicode Bidirectional
593      * Algorithm. If no strong directional character is present,
594      * then set the paragraph level to 1 (right-to-left).<p>
595      *
596      * If this value is used in conjunction with reordering modes
597      * <code>REORDER_INVERSE_LIKE_DIRECT</code> or
598      * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
599      * is assumed to be visual LTR, and the text after reordering is required
600      * to be the corresponding logical string with appropriate contextual
601      * direction. The direction of the result string will be RTL if either
602      * the rightmost or leftmost strong character of the source text is RTL
603      * or Arabic Letter, or if the text contains no strong character;
604      * the direction will be LTR otherwise.<p>
605      *
606      * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
607      * be added at the beginning of the result string to ensure round trip
608      * (that the result string, when reordered back to visual, will produce
609      * the original source text).
610      * @see #REORDER_INVERSE_LIKE_DIRECT
611      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
612      */
613     public static final byte LEVEL_DEFAULT_RTL = (byte)0x7f;
614 
615     /**
616      * Maximum explicit embedding level.
617      * Same as the max_depth value in the
618      * <a href="http://www.unicode.org/reports/tr9/#BD2">Unicode Bidirectional Algorithm</a>.
619      * (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>).
620      */
621     public static final byte MAX_EXPLICIT_LEVEL = 125;
622 
623     /**
624      * Bit flag for level input.
625      * Overrides directional properties.
626      */
627     public static final byte LEVEL_OVERRIDE = (byte)0x80;
628 
629     /**
630      * Special value which can be returned by the mapping methods when a
631      * logical index has no corresponding visual index or vice-versa. This may
632      * happen for the logical-to-visual mapping of a Bidi control when option
633      * <code>OPTION_REMOVE_CONTROLS</code> is
634      * specified. This can also happen for the visual-to-logical mapping of a
635      * Bidi mark (LRM or RLM) inserted by option
636      * <code>OPTION_INSERT_MARKS</code>.
637      * @see #getVisualIndex
638      * @see #getVisualMap
639      * @see #getLogicalIndex
640      * @see #getLogicalMap
641      * @see #OPTION_INSERT_MARKS
642      * @see #OPTION_REMOVE_CONTROLS
643      */
644     public static final int MAP_NOWHERE = -1;
645 
646     /**
647      * Left-to-right text.
648      * <ul>
649      * <li>As return value for <code>getDirection()</code>, it means
650      *     that the source string contains no right-to-left characters, or
651      *     that the source string is empty and the paragraph level is even.
652      * <li>As return value for <code>getBaseDirection()</code>, it
653      *     means that the first strong character of the source string has
654      *     a left-to-right direction.
655      * </ul>
656      */
657     public static final byte LTR = 0;
658 
659     /**
660      * Right-to-left text.
661      * <ul>
662      * <li>As return value for <code>getDirection()</code>, it means
663      *     that the source string contains no left-to-right characters, or
664      *     that the source string is empty and the paragraph level is odd.
665      * <li>As return value for <code>getBaseDirection()</code>, it
666      *     means that the first strong character of the source string has
667      *     a right-to-left direction.
668      * </ul>
669      */
670     public static final byte RTL = 1;
671 
672     /**
673      * Mixed-directional text.
674      * <p>As return value for <code>getDirection()</code>, it means
675      *    that the source string contains both left-to-right and
676      *    right-to-left characters.
677      */
678     public static final byte MIXED = 2;
679 
680     /**
681      * No strongly directional text.
682      * <p>As return value for <code>getBaseDirection()</code>, it means
683      *    that the source string is missing or empty, or contains neither
684      *    left-to-right nor right-to-left characters.
685      */
686     public static final byte NEUTRAL = 3;
687 
688     /**
689      * option bit for writeReordered():
690      * keep combining characters after their base characters in RTL runs
691      *
692      * @see #writeReordered
693      */
694     public static final short KEEP_BASE_COMBINING = 1;
695 
696     /**
697      * option bit for writeReordered():
698      * replace characters with the "mirrored" property in RTL runs
699      * by their mirror-image mappings
700      *
701      * @see #writeReordered
702      */
703     public static final short DO_MIRRORING = 2;
704 
705     /**
706      * option bit for writeReordered():
707      * surround the run with LRMs if necessary;
708      * this is part of the approximate "inverse Bidi" algorithm
709      *
710      * <p>This option does not imply corresponding adjustment of the index
711      * mappings.
712      *
713      * @see #setInverse
714      * @see #writeReordered
715      */
716     public static final short INSERT_LRM_FOR_NUMERIC = 4;
717 
718     /**
719      * option bit for writeReordered():
720      * remove Bidi control characters
721      * (this does not affect INSERT_LRM_FOR_NUMERIC)
722      *
723      * <p>This option does not imply corresponding adjustment of the index
724      * mappings.
725      *
726      * @see #writeReordered
727      * @see #INSERT_LRM_FOR_NUMERIC
728      */
729     public static final short REMOVE_BIDI_CONTROLS = 8;
730 
731     /**
732      * option bit for writeReordered():
733      * write the output in reverse order
734      *
735      * <p>This has the same effect as calling <code>writeReordered()</code>
736      * first without this option, and then calling
737      * <code>writeReverse()</code> without mirroring.
738      * Doing this in the same step is faster and avoids a temporary buffer.
739      * An example for using this option is output to a character terminal that
740      * is designed for RTL scripts and stores text in reverse order.
741      *
742      * @see #writeReordered
743      */
744     public static final short OUTPUT_REVERSE = 16;
745 
746     /** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode.
747      * @see #setReorderingMode
748      */
749     public static final short REORDER_DEFAULT = 0;
750 
751     /** Reordering mode: Logical to Visual algorithm which handles numbers in
752      * a way which mimicks the behavior of Windows XP.
753      * @see #setReorderingMode
754      */
755     public static final short REORDER_NUMBERS_SPECIAL = 1;
756 
757     /** Reordering mode: Logical to Visual algorithm grouping numbers with
758      * adjacent R characters (reversible algorithm).
759      * @see #setReorderingMode
760      */
761     public static final short REORDER_GROUP_NUMBERS_WITH_R = 2;
762 
763     /** Reordering mode: Reorder runs only to transform a Logical LTR string
764      * to the logical RTL string with the same display, or vice-versa.<br>
765      * If this mode is set together with option
766      * <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source
767      * text may be removed and other controls may be added to produce the
768      * minimum combination which has the required display.
769      * @see #OPTION_INSERT_MARKS
770      * @see #setReorderingMode
771      */
772     public static final short REORDER_RUNS_ONLY = 3;
773 
774     /** Reordering mode: Visual to Logical algorithm which handles numbers
775      * like L (same algorithm as selected by <code>setInverse(true)</code>.
776      * @see #setInverse
777      * @see #setReorderingMode
778      */
779     public static final short REORDER_INVERSE_NUMBERS_AS_L = 4;
780 
781     /** Reordering mode: Visual to Logical algorithm equivalent to the regular
782      * Logical to Visual algorithm.
783      * @see #setReorderingMode
784      */
785     public static final short REORDER_INVERSE_LIKE_DIRECT = 5;
786 
787     /** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the
788      * <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
789      * @see #setReorderingMode
790      */
791     public static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6;
792 
793     /*  Number of values for reordering mode. */
794     static final short REORDER_COUNT = 7;
795 
796     /* Reordering mode values must be ordered so that all the regular logical to
797      * visual modes come first, and all inverse Bidi modes come last.
798      */
799     static final short REORDER_LAST_LOGICAL_TO_VISUAL =
800             REORDER_NUMBERS_SPECIAL;
801 
802     /**
803      * Option value for <code>setReorderingOptions</code>:
804      * disable all the options which can be set with this method
805      * @see #setReorderingOptions
806      */
807     public static final int OPTION_DEFAULT = 0;
808 
809     /**
810      * Option bit for <code>setReorderingOptions</code>:
811      * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
812      * a reordering to a Logical order
813      *
814      * <p>This option must be set or reset before calling
815      * <code>setPara</code>.
816      *
817      * <p>This option is significant only with reordering modes which generate
818      * a result with Logical order, specifically.
819      * <ul>
820      *   <li><code>REORDER_RUNS_ONLY</code></li>
821      *   <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li>
822      *   <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li>
823      *   <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
824      * </ul>
825      *
826      * <p>If this option is set in conjunction with reordering mode
827      * <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
828      * <code>setInverse(true)</code>, it implies option
829      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
830      * <code>writeReordered()</code>.
831      *
832      * <p>For other reordering modes, a minimum number of LRM or RLM characters
833      * will be added to the source text after reordering it so as to ensure
834      * round trip, i.e. when applying the inverse reordering mode on the
835      * resulting logical text with removal of Bidi marks
836      * (option <code>OPTION_REMOVE_CONTROLS</code> set before calling
837      * <code>setPara()</code> or option
838      * <code>REMOVE_BIDI_CONTROLS</code> in
839      * <code>writeReordered</code>), the result will be identical to the
840      * source text in the first transformation.
841      *
842      * <p>This option will be ignored if specified together with option
843      * <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option
844      * <code>REMOVE_BIDI_CONTROLS</code> in calls to method
845      * <code>writeReordered()</code> and it implies option
846      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
847      * <code>writeReordered()</code> if the reordering mode is
848      * <code>REORDER_INVERSE_NUMBERS_AS_L</code>.
849      *
850      * @see #setReorderingMode
851      * @see #setReorderingOptions
852      * @see #INSERT_LRM_FOR_NUMERIC
853      * @see #REMOVE_BIDI_CONTROLS
854      * @see #OPTION_REMOVE_CONTROLS
855      * @see #REORDER_RUNS_ONLY
856      * @see #REORDER_INVERSE_NUMBERS_AS_L
857      * @see #REORDER_INVERSE_LIKE_DIRECT
858      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
859      */
860     public static final int OPTION_INSERT_MARKS = 1;
861 
862     /**
863      * Option bit for <code>setReorderingOptions</code>:
864      * remove Bidi control characters
865      *
866      * <p>This option must be set or reset before calling
867      * <code>setPara</code>.
868      *
869      * <p>This option nullifies option
870      * <code>OPTION_INSERT_MARKS</code>. It inhibits option
871      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
872      * <code>writeReordered()</code> and it implies option
873      * <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.
874      *
875      * @see #setReorderingMode
876      * @see #setReorderingOptions
877      * @see #OPTION_INSERT_MARKS
878      * @see #INSERT_LRM_FOR_NUMERIC
879      * @see #REMOVE_BIDI_CONTROLS
880      */
881     public static final int OPTION_REMOVE_CONTROLS = 2;
882 
883     /**
884      * Option bit for <code>setReorderingOptions</code>:
885      * process the output as part of a stream to be continued
886      *
887      * <p>This option must be set or reset before calling
888      * <code>setPara</code>.
889      *
890      * <p>This option specifies that the caller is interested in processing
891      * large text object in parts. The results of the successive calls are
892      * expected to be concatenated by the caller. Only the call for the last
893      * part will have this option bit off.
894      *
895      * <p>When this option bit is on, <code>setPara()</code> may process
896      * less than the full source text in order to truncate the text at a
897      * meaningful boundary. The caller should call
898      * <code>getProcessedLength()</code> immediately after calling
899      * <code>setPara()</code> in order to determine how much of the source
900      * text has been processed. Source text beyond that length should be
901      * resubmitted in following calls to <code>setPara</code>. The
902      * processed length may be less than the length of the source text if a
903      * character preceding the last character of the source text constitutes a
904      * reasonable boundary (like a block separator) for text to be continued.<br>
905      * If the last character of the source text constitutes a reasonable
906      * boundary, the whole text will be processed at once.<br>
907      * If nowhere in the source text there exists
908      * such a reasonable boundary, the processed length will be zero.<br>
909      * The caller should check for such an occurrence and do one of the following:
910      * <ul><li>submit a larger amount of text with a better chance to include
911      *         a reasonable boundary.</li>
912      *     <li>resubmit the same text after turning off option
913      *         <code>OPTION_STREAMING</code>.</li></ul>
914      * In all cases, this option should be turned off before processing the last
915      * part of the text.
916      *
917      * <p>When the <code>OPTION_STREAMING</code> option is used, it is
918      * recommended to call <code>orderParagraphsLTR(true)</code> before calling
919      * <code>setPara()</code> so that later paragraphs may be concatenated to
920      * previous paragraphs on the right.
921      *
922      * @see #setReorderingMode
923      * @see #setReorderingOptions
924      * @see #getProcessedLength
925      */
926     public static final int OPTION_STREAMING = 4;
927 
928     /*
929      *   Comparing the description of the Bidi algorithm with this implementation
930      *   is easier with the same names for the Bidi types in the code as there.
931      *   See UCharacterDirection
932      */
933     static final byte L   = UCharacterDirection.LEFT_TO_RIGHT;                  /*  0 */
934     static final byte R   = UCharacterDirection.RIGHT_TO_LEFT;                  /*  1 */
935     static final byte EN  = UCharacterDirection.EUROPEAN_NUMBER;                /*  2 */
936     static final byte ES  = UCharacterDirection.EUROPEAN_NUMBER_SEPARATOR;      /*  3 */
937     static final byte ET  = UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR;     /*  4 */
938     static final byte AN  = UCharacterDirection.ARABIC_NUMBER;                  /*  5 */
939     static final byte CS  = UCharacterDirection.COMMON_NUMBER_SEPARATOR;        /*  6 */
940     static final byte B   = UCharacterDirection.BLOCK_SEPARATOR;                /*  7 */
941     static final byte S   = UCharacterDirection.SEGMENT_SEPARATOR;              /*  8 */
942     static final byte WS  = UCharacterDirection.WHITE_SPACE_NEUTRAL;            /*  9 */
943     static final byte ON  = UCharacterDirection.OTHER_NEUTRAL;                  /* 10 */
944     static final byte LRE = UCharacterDirection.LEFT_TO_RIGHT_EMBEDDING;        /* 11 */
945     static final byte LRO = UCharacterDirection.LEFT_TO_RIGHT_OVERRIDE;         /* 12 */
946     static final byte AL  = UCharacterDirection.RIGHT_TO_LEFT_ARABIC;           /* 13 */
947     static final byte RLE = UCharacterDirection.RIGHT_TO_LEFT_EMBEDDING;        /* 14 */
948     static final byte RLO = UCharacterDirection.RIGHT_TO_LEFT_OVERRIDE;         /* 15 */
949     static final byte PDF = UCharacterDirection.POP_DIRECTIONAL_FORMAT;         /* 16 */
950     static final byte NSM = UCharacterDirection.DIR_NON_SPACING_MARK;           /* 17 */
951     static final byte BN  = UCharacterDirection.BOUNDARY_NEUTRAL;               /* 18 */
952     static final byte FSI = UCharacterDirection.FIRST_STRONG_ISOLATE;           /* 19 */
953     static final byte LRI = UCharacterDirection.LEFT_TO_RIGHT_ISOLATE;          /* 20 */
954     static final byte RLI = UCharacterDirection.RIGHT_TO_LEFT_ISOLATE;          /* 21 */
955     static final byte PDI = UCharacterDirection.POP_DIRECTIONAL_ISOLATE;        /* 22 */
956     static final byte ENL = PDI + 1;    /* EN after W7 */                       /* 23 */
957     static final byte ENR = ENL + 1;    /* EN not subject to W7 */              /* 24 */
958 
959     /**
960      * Value returned by <code>BidiClassifier</code> when there is no need to
961      * override the standard Bidi class for a given code point.
962      *
963      * <p>This constant is deprecated; use UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)+1 instead.
964      *
965      * @see BidiClassifier
966      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
967      * @hide deprecated on icu4j-org
968      */
969     @Deprecated
970     public static final int CLASS_DEFAULT = UCharacterDirection.CHAR_DIRECTION_COUNT;
971 
972     /* number of paras entries allocated initially */
973     static final int SIMPLE_PARAS_COUNT = 10;
974     /* number of isolate run entries for paired brackets allocated initially */
975     static final int SIMPLE_OPENINGS_COUNT = 20;
976 
977     private static final char CR = '\r';
978     private static final char LF = '\n';
979 
980     static final int LRM_BEFORE = 1;
981     static final int LRM_AFTER = 2;
982     static final int RLM_BEFORE = 4;
983     static final int RLM_AFTER = 8;
984 
985     /* flags for Opening.flags */
986     static final byte FOUND_L = (byte)DirPropFlag(L);
987     static final byte FOUND_R = (byte)DirPropFlag(R);
988 
989     /*
990      * The following bit is used for the directional isolate status.
991      * Stack entries corresponding to isolate sequences are greater than ISOLATE.
992      */
993     static final int ISOLATE = 0x0100;
994 
995 
996     /*
997      * reference to parent paragraph object (reference to self if this object is
998      * a paragraph object); set to null in a newly opened object; set to a
999      * real value after a successful execution of setPara or setLine
1000      */
1001     Bidi                paraBidi;
1002 
1003     final UBiDiProps    bdp;
1004 
1005     /* character array representing the current text */
1006     char[]              text;
1007 
1008     /* length of the current text */
1009     int                 originalLength;
1010 
1011     /* if the option OPTION_STREAMING is set, this is the length of
1012      * text actually processed by <code>setPara</code>, which may be shorter
1013      * than the original length. Otherwise, it is identical to the original
1014      * length.
1015      */
1016     int                 length;
1017 
1018     /* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi
1019      * marks are allowed to be inserted in one of the reordering modes, the
1020      * length of the result string may be different from the processed length.
1021      */
1022     int                 resultLength;
1023 
1024     /* indicators for whether memory may be allocated after construction */
1025     boolean             mayAllocateText;
1026     boolean             mayAllocateRuns;
1027 
1028     /* arrays with one value per text-character */
1029     byte[]              dirPropsMemory = new byte[1];
1030     byte[]              levelsMemory = new byte[1];
1031     byte[]              dirProps;
1032     byte[]              levels;
1033 
1034     /* are we performing an approximation of the "inverse Bidi" algorithm? */
1035     boolean             isInverse;
1036 
1037     /* are we using the basic algorithm or its variation? */
1038     int                 reorderingMode;
1039 
1040     /* bitmask for reordering options */
1041     int                 reorderingOptions;
1042 
1043     /* must block separators receive level 0? */
1044     boolean             orderParagraphsLTR;
1045 
1046     /* the paragraph level */
1047     byte                paraLevel;
1048     /* original paraLevel when contextual */
1049     /* must be one of DEFAULT_xxx or 0 if not contextual */
1050     byte                defaultParaLevel;
1051 
1052     /* context data */
1053     String              prologue;
1054     String              epilogue;
1055 
1056     /* the following is set in setPara, used in processPropertySeq */
1057 
1058     ImpTabPair          impTabPair;  /* reference to levels state table pair */
1059     /* the overall paragraph or line directionality*/
1060     byte                direction;
1061 
1062     /* flags is a bit set for which directional properties are in the text */
1063     int                 flags;
1064 
1065     /* lastArabicPos is index to the last AL in the text, -1 if none */
1066     int                 lastArabicPos;
1067 
1068     /* characters after trailingWSStart are WS and are */
1069     /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
1070     int                 trailingWSStart;
1071 
1072     /* fields for paragraph handling, set in getDirProps() */
1073     int                 paraCount;
1074     int[]               paras_limit = new int[SIMPLE_PARAS_COUNT];
1075     byte[]              paras_level = new byte[SIMPLE_PARAS_COUNT];
1076 
1077     /* fields for line reordering */
1078     int                 runCount;     /* ==-1: runs not set up yet */
1079     BidiRun[]           runsMemory = new BidiRun[0];
1080     BidiRun[]           runs;
1081 
1082     /* for non-mixed text, we only need a tiny array of runs (no allocation) */
1083     BidiRun[]           simpleRuns = {new BidiRun()};
1084 
1085     /* fields for managing isolate sequences */
1086     Isolate[]           isolates;
1087     /* maximum or current nesting depth of isolate sequences */
1088     /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
1089        nesting encountered.
1090        Within resolveImplicitLevels(), this is the index of the current isolates
1091        stack entry. */
1092     int                 isolateCount;
1093 
1094     /* mapping of runs in logical order to visual order */
1095     int[]               logicalToVisualRunsMap;
1096     /* flag to indicate that the map has been updated */
1097     boolean             isGoodLogicalToVisualRunsMap;
1098 
1099     /* customized class provider */
1100     BidiClassifier      customClassifier = null;
1101 
1102     /* for inverse Bidi with insertion of directional marks */
1103     InsertPoints        insertPoints = new InsertPoints();
1104 
1105     /* for option OPTION_REMOVE_CONTROLS */
1106     int                 controlCount;
1107 
1108     /*
1109      * Sometimes, bit values are more appropriate
1110      * to deal with directionality properties.
1111      * Abbreviations in these method names refer to names
1112      * used in the Bidi algorithm.
1113      */
DirPropFlag(byte dir)1114     static int DirPropFlag(byte dir) {
1115         return (1 << dir);
1116     }
1117 
testDirPropFlagAt(int flag, int index)1118     boolean testDirPropFlagAt(int flag, int index) {
1119         return ((DirPropFlag(dirProps[index]) & flag) != 0);
1120     }
1121 
1122     static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
1123 
1124     /* to avoid some conditional statements, use tiny constant arrays */
1125     static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) };
1126     static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) };
1127     static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) };
1128 
DirPropFlagLR(byte level)1129     static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
DirPropFlagE(byte level)1130     static final int DirPropFlagE(byte level)  { return DirPropFlagE[level & 1]; }
DirPropFlagO(byte level)1131     static final int DirPropFlagO(byte level)  { return DirPropFlagO[level & 1]; }
DirFromStrong(byte strong)1132     static final byte DirFromStrong(byte strong) { return strong == L ? L : R; }
NoOverride(byte level)1133     static final byte NoOverride(byte level) { return (byte)(level & ~LEVEL_OVERRIDE); }
1134 
1135     /*  are there any characters that are LTR or RTL? */
1136     static final int MASK_LTR =
1137         DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(ENL)|DirPropFlag(ENR)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(LRI);
1138     static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(RLI);
1139 
1140     static final int MASK_R_AL = DirPropFlag(R)|DirPropFlag(AL);
1141     static final int MASK_STRONG_EN_AN = DirPropFlag(L)|DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(EN)|DirPropFlag(AN);
1142     /* explicit embedding codes */
1143     static final int MASK_EXPLICIT = DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(PDF);
1144     static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
1145 
1146     /* explicit isolate codes */
1147     static final int MASK_ISO = DirPropFlag(LRI)|DirPropFlag(RLI)|DirPropFlag(FSI)|DirPropFlag(PDI);
1148 
1149     /* paragraph and segment separators */
1150     static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S);
1151 
1152     /* all types that are counted as White Space or Neutral in some steps */
1153     static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT|MASK_ISO;
1154 
1155     /* types that are neutrals or could becomes neutrals in (Wn) */
1156     static final int MASK_POSSIBLE_N = DirPropFlag(ON)|DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_WS;
1157 
1158     /*
1159      * These types may be changed to "e",
1160      * the embedding type (L or R) of the run,
1161      * in the Bidi algorithm (N2)
1162      */
1163     static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N;
1164 
1165     /*
1166      *  the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java
1167      */
GetLRFromLevel(byte level)1168     static byte GetLRFromLevel(byte level)
1169     {
1170         return (byte)(level & 1);
1171     }
1172 
IsDefaultLevel(byte level)1173     static boolean IsDefaultLevel(byte level)
1174     {
1175         return ((level & LEVEL_DEFAULT_LTR) == LEVEL_DEFAULT_LTR);
1176     }
1177 
IsBidiControlChar(int c)1178     static boolean IsBidiControlChar(int c)
1179     {
1180         /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
1181                            0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
1182         return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e))
1183                                              || ((c >= 0x2066) && (c <= 0x2069)));
1184     }
1185 
verifyValidPara()1186     void verifyValidPara()
1187     {
1188         if (!(this == this.paraBidi)) {
1189             throw new IllegalStateException();
1190         }
1191     }
1192 
verifyValidParaOrLine()1193     void verifyValidParaOrLine()
1194     {
1195         Bidi para = this.paraBidi;
1196         /* verify Para */
1197         if (this == para) {
1198             return;
1199         }
1200         /* verify Line */
1201         if ((para == null) || (para != para.paraBidi)) {
1202             throw new IllegalStateException();
1203         }
1204     }
1205 
verifyRange(int index, int start, int limit)1206     void verifyRange(int index, int start, int limit)
1207     {
1208         if (index < start || index >= limit) {
1209             throw new IllegalArgumentException("Value " + index +
1210                       " is out of range " + start + " to " + limit);
1211         }
1212     }
1213 
1214     /**
1215      * Allocate a <code>Bidi</code> object.
1216      * Such an object is initially empty. It is assigned
1217      * the Bidi properties of a piece of text containing one or more paragraphs
1218      * by <code>setPara()</code>
1219      * or the Bidi properties of a line within a paragraph by
1220      * <code>setLine()</code>.<p>
1221      * This object can be reused.<p>
1222      * <code>setPara()</code> and <code>setLine()</code> will allocate
1223      * additional memory for internal structures as necessary.
1224      */
Bidi()1225     public Bidi()
1226     {
1227         this(0, 0);
1228     }
1229 
1230     /**
1231      * Allocate a <code>Bidi</code> object with preallocated memory
1232      * for internal structures.
1233      * This method provides a <code>Bidi</code> object like the default constructor
1234      * but it also preallocates memory for internal structures
1235      * according to the sizings supplied by the caller.<p>
1236      * The preallocation can be limited to some of the internal memory
1237      * by setting some values to 0 here. That means that if, e.g.,
1238      * <code>maxRunCount</code> cannot be reasonably predetermined and should not
1239      * be set to <code>maxLength</code> (the only failproof value) to avoid
1240      * wasting  memory, then <code>maxRunCount</code> could be set to 0 here
1241      * and the internal structures that are associated with it will be allocated
1242      * on demand, just like with the default constructor.
1243      *
1244      * @param maxLength is the maximum text or line length that internal memory
1245      *        will be preallocated for. An attempt to associate this object with a
1246      *        longer text will fail, unless this value is 0, which leaves the allocation
1247      *        up to the implementation.
1248      *
1249      * @param maxRunCount is the maximum anticipated number of same-level runs
1250      *        that internal memory will be preallocated for. An attempt to access
1251      *        visual runs on an object that was not preallocated for as many runs
1252      *        as the text was actually resolved to will fail,
1253      *        unless this value is 0, which leaves the allocation up to the implementation.<br><br>
1254      *        The number of runs depends on the actual text and maybe anywhere between
1255      *        1 and <code>maxLength</code>. It is typically small.
1256      *
1257      * @throws IllegalArgumentException if maxLength or maxRunCount is less than 0
1258      */
Bidi(int maxLength, int maxRunCount)1259     public Bidi(int maxLength, int maxRunCount)
1260     {
1261         /* check the argument values */
1262         if (maxLength < 0 || maxRunCount < 0) {
1263             throw new IllegalArgumentException();
1264         }
1265 
1266         /* reset the object, all reference variables null, all flags false,
1267            all sizes 0.
1268            In fact, we don't need to do anything, since class members are
1269            initialized as zero when an instance is created.
1270          */
1271         /*
1272         mayAllocateText = false;
1273         mayAllocateRuns = false;
1274         orderParagraphsLTR = false;
1275         paraCount = 0;
1276         runCount = 0;
1277         trailingWSStart = 0;
1278         flags = 0;
1279         paraLevel = 0;
1280         defaultParaLevel = 0;
1281         direction = 0;
1282         */
1283         /* get Bidi properties */
1284         bdp = UBiDiProps.INSTANCE;
1285 
1286         /* allocate memory for arrays as requested */
1287         if (maxLength > 0) {
1288             getInitialDirPropsMemory(maxLength);
1289             getInitialLevelsMemory(maxLength);
1290         } else {
1291             mayAllocateText = true;
1292         }
1293 
1294         if (maxRunCount > 0) {
1295             // if maxRunCount == 1, use simpleRuns[]
1296             if (maxRunCount > 1) {
1297                 getInitialRunsMemory(maxRunCount);
1298             }
1299         } else {
1300             mayAllocateRuns = true;
1301         }
1302     }
1303 
1304     /*
1305      * We are allowed to allocate memory if object==null or
1306      * mayAllocate==true for each array that we need.
1307      *
1308      * Assume sizeNeeded>0.
1309      * If object != null, then assume size > 0.
1310      */
getMemory(String label, Object array, Class<?> arrayClass, boolean mayAllocate, int sizeNeeded)1311     private Object getMemory(String label, Object array, Class<?> arrayClass,
1312             boolean mayAllocate, int sizeNeeded)
1313     {
1314         int len = Array.getLength(array);
1315 
1316         /* we have at least enough memory and must not allocate */
1317         if (sizeNeeded == len) {
1318             return array;
1319         }
1320         if (!mayAllocate) {
1321             /* we must not allocate */
1322             if (sizeNeeded <= len) {
1323                 return array;
1324             }
1325             throw new OutOfMemoryError("Failed to allocate memory for "
1326                                        + label);
1327         }
1328         /* we may try to grow or shrink */
1329         /* FOOD FOR THOUGHT: when shrinking it should be possible to avoid
1330            the allocation altogether and rely on this.length */
1331         try {
1332             return Array.newInstance(arrayClass, sizeNeeded);
1333         } catch (Exception e) {
1334             throw new OutOfMemoryError("Failed to allocate memory for "
1335                                        + label);
1336         }
1337     }
1338 
1339     /* helper methods for each allocated array */
getDirPropsMemory(boolean mayAllocate, int len)1340     private void getDirPropsMemory(boolean mayAllocate, int len)
1341     {
1342         Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len);
1343         dirPropsMemory = (byte[]) array;
1344     }
1345 
getDirPropsMemory(int len)1346     void getDirPropsMemory(int len)
1347     {
1348         getDirPropsMemory(mayAllocateText, len);
1349     }
1350 
getLevelsMemory(boolean mayAllocate, int len)1351     private void getLevelsMemory(boolean mayAllocate, int len)
1352     {
1353         Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len);
1354         levelsMemory = (byte[]) array;
1355     }
1356 
getLevelsMemory(int len)1357     void getLevelsMemory(int len)
1358     {
1359         getLevelsMemory(mayAllocateText, len);
1360     }
1361 
getRunsMemory(boolean mayAllocate, int len)1362     private void getRunsMemory(boolean mayAllocate, int len)
1363     {
1364         Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len);
1365         runsMemory = (BidiRun[]) array;
1366     }
1367 
getRunsMemory(int len)1368     void getRunsMemory(int len)
1369     {
1370         getRunsMemory(mayAllocateRuns, len);
1371     }
1372 
1373     /* additional methods used by constructor - always allow allocation */
getInitialDirPropsMemory(int len)1374     private void getInitialDirPropsMemory(int len)
1375     {
1376         getDirPropsMemory(true, len);
1377     }
1378 
getInitialLevelsMemory(int len)1379     private void getInitialLevelsMemory(int len)
1380     {
1381         getLevelsMemory(true, len);
1382     }
1383 
getInitialRunsMemory(int len)1384     private void getInitialRunsMemory(int len)
1385     {
1386         getRunsMemory(true, len);
1387     }
1388 
1389     /**
1390      * Modify the operation of the Bidi algorithm such that it
1391      * approximates an "inverse Bidi" algorithm. This method
1392      * must be called before <code>setPara()</code>.
1393      *
1394      * <p>The normal operation of the Bidi algorithm as described
1395      * in the Unicode Technical Report is to take text stored in logical
1396      * (keyboard, typing) order and to determine the reordering of it for visual
1397      * rendering.
1398      * Some legacy systems store text in visual order, and for operations
1399      * with standard, Unicode-based algorithms, the text needs to be transformed
1400      * to logical order. This is effectively the inverse algorithm of the
1401      * described Bidi algorithm. Note that there is no standard algorithm for
1402      * this "inverse Bidi" and that the current implementation provides only an
1403      * approximation of "inverse Bidi".
1404      *
1405      * <p>With <code>isInversed</code> set to <code>true</code>,
1406      * this method changes the behavior of some of the subsequent methods
1407      * in a way that they can be used for the inverse Bidi algorithm.
1408      * Specifically, runs of text with numeric characters will be treated in a
1409      * special way and may need to be surrounded with LRM characters when they are
1410      * written in reordered sequence.
1411      *
1412      * <p>Output runs should be retrieved using <code>getVisualRun()</code>.
1413      * Since the actual input for "inverse Bidi" is visually ordered text and
1414      * <code>getVisualRun()</code> gets the reordered runs, these are actually
1415      * the runs of the logically ordered output.
1416      *
1417      * <p>Calling this method with argument <code>isInverse</code> set to
1418      * <code>true</code> is equivalent to calling <code>setReorderingMode</code>
1419      * with argument <code>reorderingMode</code>
1420      * set to <code>REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
1421      * Calling this method with argument <code>isInverse</code> set to
1422      * <code>false</code> is equivalent to calling <code>setReorderingMode</code>
1423      * with argument <code>reorderingMode</code>
1424      * set to <code>REORDER_DEFAULT</code>.
1425      *
1426      * @param isInverse specifies "forward" or "inverse" Bidi operation.
1427      *
1428      * @see #setPara
1429      * @see #writeReordered
1430      * @see #setReorderingMode
1431      * @see #REORDER_INVERSE_NUMBERS_AS_L
1432      * @see #REORDER_DEFAULT
1433      */
setInverse(boolean isInverse)1434     public void setInverse(boolean isInverse) {
1435         this.isInverse = (isInverse);
1436         this.reorderingMode = isInverse ? REORDER_INVERSE_NUMBERS_AS_L
1437                 : REORDER_DEFAULT;
1438     }
1439 
1440     /**
1441      * Is this <code>Bidi</code> object set to perform the inverse Bidi
1442      * algorithm?
1443      * <p>Note: calling this method after setting the reordering mode with
1444      * <code>setReorderingMode</code> will return <code>true</code> if the
1445      * reordering mode was set to
1446      * <code>REORDER_INVERSE_NUMBERS_AS_L</code>, <code>false</code>
1447      * for all other values.
1448      *
1449      * @return <code>true</code> if the <code>Bidi</code> object is set to
1450      * perform the inverse Bidi algorithm by handling numbers as L.
1451      *
1452      * @see #setInverse
1453      * @see #setReorderingMode
1454      * @see #REORDER_INVERSE_NUMBERS_AS_L
1455      */
isInverse()1456     public boolean isInverse() {
1457         return isInverse;
1458     }
1459 
1460     /**
1461      * Modify the operation of the Bidi algorithm such that it implements some
1462      * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
1463      * algorithm, depending on different values of the "reordering mode".
1464      * This method must be called before <code>setPara()</code>, and stays in
1465      * effect until called again with a different argument.
1466      *
1467      * <p>The normal operation of the Bidi algorithm as described in the Unicode
1468      * Standard Annex #9 is to take text stored in logical (keyboard, typing)
1469      * order and to determine how to reorder it for visual rendering.
1470      *
1471      * <p>With the reordering mode set to a value other than
1472      * <code>REORDER_DEFAULT</code>, this method changes the behavior of some of
1473      * the subsequent methods in a way such that they implement an inverse Bidi
1474      * algorithm or some other algorithm variants.
1475      *
1476      * <p>Some legacy systems store text in visual order, and for operations
1477      * with standard, Unicode-based algorithms, the text needs to be transformed
1478      * into logical order. This is effectively the inverse algorithm of the
1479      * described Bidi algorithm. Note that there is no standard algorithm for
1480      * this "inverse Bidi", so a number of variants are implemented here.
1481      *
1482      * <p>In other cases, it may be desirable to emulate some variant of the
1483      * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
1484      * Logical to Logical transformation.
1485      *
1486      * <ul>
1487      * <li>When the Reordering Mode is set to
1488      * <code>REORDER_DEFAULT</code>,
1489      * the standard Bidi Logical to Visual algorithm is applied.</li>
1490      *
1491      * <li>When the reordering mode is set to
1492      * <code>REORDER_NUMBERS_SPECIAL</code>,
1493      * the algorithm used to perform Bidi transformations when calling
1494      * <code>setPara</code> should approximate the algorithm used in Microsoft
1495      * Windows XP rather than strictly conform to the Unicode Bidi algorithm.
1496      * <br>
1497      * The differences between the basic algorithm and the algorithm addressed
1498      * by this option are as follows:
1499      * <ul>
1500      *   <li>Within text at an even embedding level, the sequence "123AB"
1501      *   (where AB represent R or AL letters) is transformed to "123BA" by the
1502      *   Unicode algorithm and to "BA123" by the Windows algorithm.</li>
1503      *
1504      *   <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just
1505      *   like regular numbers (EN).</li>
1506      * </ul></li>
1507      *
1508      * <li>When the reordering mode is set to
1509      * <code>REORDER_GROUP_NUMBERS_WITH_R</code>,
1510      * numbers located between LTR text and RTL text are associated with the RTL
1511      * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
1512      * upper case letters represent RTL characters) will be transformed to
1513      * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
1514      * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
1515      * This makes the algorithm reversible and makes it useful when round trip
1516      * (from visual to logical and back to visual) must be achieved without
1517      * adding LRM characters. However, this is a variation from the standard
1518      * Unicode Bidi algorithm.<br>
1519      * The source text should not contain Bidi control characters other than LRM
1520      * or RLM.</li>
1521      *
1522      * <li>When the reordering mode is set to
1523      * <code>REORDER_RUNS_ONLY</code>,
1524      * a "Logical to Logical" transformation must be performed:
1525      * <ul>
1526      * <li>If the default text level of the source text (argument
1527      * <code>paraLevel</code> in <code>setPara</code>) is even, the source text
1528      * will be handled as LTR logical text and will be transformed to the RTL
1529      * logical text which has the same LTR visual display.</li>
1530      * <li>If the default level of the source text is odd, the source text
1531      * will be handled as RTL logical text and will be transformed to the
1532      * LTR logical text which has the same LTR visual display.</li>
1533      * </ul>
1534      * This mode may be needed when logical text which is basically Arabic or
1535      * Hebrew, with possible included numbers or phrases in English, has to be
1536      * displayed as if it had an even embedding level (this can happen if the
1537      * displaying application treats all text as if it was basically LTR).
1538      * <br>
1539      * This mode may also be needed in the reverse case, when logical text which
1540      * is basically English, with possible included phrases in Arabic or Hebrew,
1541      * has to be displayed as if it had an odd embedding level.
1542      * <br>
1543      * Both cases could be handled by adding LRE or RLE at the head of the
1544      * text, if the display subsystem supports these formatting controls. If it
1545      * does not, the problem may be handled by transforming the source text in
1546      * this mode before displaying it, so that it will be displayed properly.
1547      * <br>
1548      * The source text should not contain Bidi control characters other than LRM
1549      * or RLM.</li>
1550      *
1551      * <li>When the reordering mode is set to
1552      * <code>REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi"
1553      * algorithm is applied.
1554      * Runs of text with numeric characters will be treated like LTR letters and
1555      * may need to be surrounded with LRM characters when they are written in
1556      * reordered sequence (the option <code>INSERT_LRM_FOR_NUMERIC</code> can
1557      * be used with method <code>writeReordered</code> to this end. This mode
1558      * is equivalent to calling <code>setInverse()</code> with
1559      * argument <code>isInverse</code> set to <code>true</code>.</li>
1560      *
1561      * <li>When the reordering mode is set to
1562      * <code>REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to
1563      * Visual Bidi algorithm is used as an approximation of an "inverse Bidi"
1564      * algorithm. This mode is similar to mode
1565      * <code>REORDER_INVERSE_NUMBERS_AS_L</code> but is closer to the
1566      * regular Bidi algorithm.
1567      * <br>
1568      * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
1569      * upper case represents RTL characters) will be transformed to
1570      * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
1571      * with mode <code>REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
1572      * When used in conjunction with option
1573      * <code>OPTION_INSERT_MARKS</code>, this mode generally
1574      * adds Bidi marks to the output significantly more sparingly than mode
1575      * <code>REORDER_INVERSE_NUMBERS_AS_L</code>.<br> with option
1576      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to
1577      * <code>writeReordered</code>.</li>
1578      *
1579      * <li>When the reordering mode is set to
1580      * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual
1581      * Bidi algorithm used in Windows XP is used as an approximation of an "inverse
1582      * Bidi" algorithm.
1583      * <br>
1584      * For example, an LTR paragraph with the content "abc FED123" (where
1585      * upper case represents RTL characters) will be transformed to
1586      * "abc 123DEF.</li>
1587      * </ul>
1588      *
1589      * <p>In all the reordering modes specifying an "inverse Bidi" algorithm
1590      * (i.e. those with a name starting with <code>REORDER_INVERSE</code>),
1591      * output runs should be retrieved using <code>getVisualRun()</code>, and
1592      * the output text with <code>writeReordered()</code>. The caller should
1593      * keep in mind that in "inverse Bidi" modes the input is actually visually
1594      * ordered text and reordered output returned by <code>getVisualRun()</code>
1595      * or <code>writeReordered()</code> are actually runs or character string
1596      * of logically ordered output.<br>
1597      * For all the "inverse Bidi" modes, the source text should not contain
1598      * Bidi control characters other than LRM or RLM.
1599      *
1600      * <p>Note that option <code>OUTPUT_REVERSE</code> of
1601      * <code>writeReordered</code> has no useful meaning and should not be used
1602      * in conjunction with any value of the reordering mode specifying "inverse
1603      * Bidi" or with value <code>REORDER_RUNS_ONLY</code>.
1604      *
1605      * @param reorderingMode specifies the required variant of the Bidi
1606      *                       algorithm.
1607      *
1608      * @see #setInverse
1609      * @see #setPara
1610      * @see #writeReordered
1611      * @see #INSERT_LRM_FOR_NUMERIC
1612      * @see #OUTPUT_REVERSE
1613      * @see #REORDER_DEFAULT
1614      * @see #REORDER_NUMBERS_SPECIAL
1615      * @see #REORDER_GROUP_NUMBERS_WITH_R
1616      * @see #REORDER_RUNS_ONLY
1617      * @see #REORDER_INVERSE_NUMBERS_AS_L
1618      * @see #REORDER_INVERSE_LIKE_DIRECT
1619      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
1620      */
setReorderingMode(int reorderingMode)1621     public void setReorderingMode(int reorderingMode) {
1622         if ((reorderingMode < REORDER_DEFAULT) ||
1623             (reorderingMode >= REORDER_COUNT))
1624             return;                     /* don't accept a wrong value */
1625         this.reorderingMode = reorderingMode;
1626         this.isInverse =
1627             reorderingMode == REORDER_INVERSE_NUMBERS_AS_L;
1628     }
1629 
1630     /**
1631      * What is the requested reordering mode for a given Bidi object?
1632      *
1633      * @return the current reordering mode of the Bidi object
1634      *
1635      * @see #setReorderingMode
1636      */
getReorderingMode()1637     public int getReorderingMode() {
1638         return this.reorderingMode;
1639     }
1640 
1641     /**
1642      * Specify which of the reordering options should be applied during Bidi
1643      * transformations.
1644      *
1645      * @param options A combination of zero or more of the following
1646      * reordering options:
1647      * <code>OPTION_DEFAULT</code>, <code>OPTION_INSERT_MARKS</code>,
1648      * <code>OPTION_REMOVE_CONTROLS</code>, <code>OPTION_STREAMING</code>.
1649      *
1650      * @see #getReorderingOptions
1651      * @see #OPTION_DEFAULT
1652      * @see #OPTION_INSERT_MARKS
1653      * @see #OPTION_REMOVE_CONTROLS
1654      * @see #OPTION_STREAMING
1655      */
setReorderingOptions(int options)1656     public void setReorderingOptions(int options) {
1657         if ((options & OPTION_REMOVE_CONTROLS) != 0) {
1658             this.reorderingOptions = options & ~OPTION_INSERT_MARKS;
1659         } else {
1660             this.reorderingOptions = options;
1661         }
1662     }
1663 
1664     /**
1665      * What are the reordering options applied to a given Bidi object?
1666      *
1667      * @return the current reordering options of the Bidi object
1668      *
1669      * @see #setReorderingOptions
1670      */
getReorderingOptions()1671     public int getReorderingOptions() {
1672         return this.reorderingOptions;
1673     }
1674 
1675     /**
1676      * Get the base direction of the text provided according to the Unicode
1677      * Bidirectional Algorithm. The base direction is derived from the first
1678      * character in the string with bidirectional character type L, R, or AL.
1679      * If the first such character has type L, LTR is returned. If the first
1680      * such character has type R or AL, RTL is returned. If the string does
1681      * not contain any character of these types, then NEUTRAL is returned.
1682      * This is a lightweight function for use when only the base direction is
1683      * needed and no further bidi processing of the text is needed.
1684      * @param paragraph the text whose paragraph level direction is needed.
1685      * @return LTR, RTL, NEUTRAL
1686      * @see #LTR
1687      * @see #RTL
1688      * @see #NEUTRAL
1689      */
getBaseDirection(CharSequence paragraph)1690     public static byte getBaseDirection(CharSequence paragraph) {
1691         if (paragraph == null || paragraph.length() == 0) {
1692             return NEUTRAL;
1693         }
1694 
1695         int length = paragraph.length();
1696         int c;// codepoint
1697         byte direction;
1698 
1699         for (int i = 0; i < length; ) {
1700             // U16_NEXT(paragraph, i, length, c) for C++
1701             c = UCharacter.codePointAt(paragraph, i);
1702             direction = UCharacter.getDirectionality(c);
1703             if (direction == UCharacterDirection.LEFT_TO_RIGHT) {
1704                 return LTR;
1705             } else if (direction == UCharacterDirection.RIGHT_TO_LEFT
1706                 || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) {
1707                 return RTL;
1708             }
1709 
1710             i = UCharacter.offsetByCodePoints(paragraph, i, 1);// set i to the head index of next codepoint
1711         }
1712         return NEUTRAL;
1713     }
1714 
1715 /* perform (P2)..(P3) ------------------------------------------------------- */
1716 
1717     /**
1718      * Returns the directionality of the first strong character
1719      * after the last B in prologue, if any.
1720      * Requires prologue!=null.
1721      */
firstL_R_AL()1722     private byte firstL_R_AL() {
1723         byte result = ON;
1724         for (int i = 0; i < prologue.length(); ) {
1725             int uchar = prologue.codePointAt(i);
1726             i += Character.charCount(uchar);
1727             byte dirProp = (byte)getCustomizedClass(uchar);
1728             if (result == ON) {
1729                 if (dirProp == L || dirProp == R || dirProp == AL) {
1730                     result = dirProp;
1731                 }
1732             } else {
1733                 if (dirProp == B) {
1734                     result = ON;
1735                 }
1736             }
1737         }
1738         return result;
1739     }
1740 
1741     /*
1742      * Check that there are enough entries in the arrays paras_limit and paras_level
1743      */
checkParaCount()1744     private void checkParaCount() {
1745         int[] saveLimits;
1746         byte[] saveLevels;
1747         int count = paraCount;
1748         if (count <= paras_level.length)
1749             return;
1750         int oldLength = paras_level.length;
1751         saveLimits = paras_limit;
1752         saveLevels = paras_level;
1753         try {
1754             paras_limit = new int[count * 2];
1755             paras_level = new byte[count * 2];
1756         } catch (Exception e) {
1757             throw new OutOfMemoryError("Failed to allocate memory for paras");
1758         }
1759         System.arraycopy(saveLimits, 0, paras_limit, 0, oldLength);
1760         System.arraycopy(saveLevels, 0, paras_level, 0, oldLength);
1761     }
1762 
1763     /*
1764      * Get the directional properties for the text, calculate the flags bit-set, and
1765      * determine the paragraph level if necessary (in paras_level[i]).
1766      * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
1767      * When encountering an FSI, it is initially replaced with an LRI, which is the
1768      * default. Only if a strong R or AL is found within its scope will the LRI be
1769      * replaced by an RLI.
1770      */
1771     static final int NOT_SEEKING_STRONG = 0;        /* 0: not contextual paraLevel, not after FSI */
1772     static final int SEEKING_STRONG_FOR_PARA = 1;   /* 1: looking for first strong char in para */
1773     static final int SEEKING_STRONG_FOR_FSI = 2;    /* 2: looking for first strong after FSI */
1774     static final int LOOKING_FOR_PDI = 3;           /* 3: found strong after FSI, looking for PDI */
1775 
getDirProps()1776     private void getDirProps()
1777     {
1778         int i = 0, i0, i1;
1779         flags = 0;          /* collect all directionalities in the text */
1780         int uchar;
1781         byte dirProp;
1782         byte defaultParaLevel = 0;   /* initialize to avoid compiler warnings */
1783         boolean isDefaultLevel = IsDefaultLevel(paraLevel);
1784         /* for inverse Bidi, the default para level is set to RTL if there is a
1785            strong R or AL character at either end of the text                */
1786         boolean isDefaultLevelInverse=isDefaultLevel &&
1787                 (reorderingMode == REORDER_INVERSE_LIKE_DIRECT ||
1788                  reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
1789         lastArabicPos = -1;
1790         int controlCount = 0;
1791         boolean removeBidiControls = (reorderingOptions & OPTION_REMOVE_CONTROLS) != 0;
1792 
1793         byte state;
1794         byte lastStrong = ON;           /* for default level & inverse Bidi */
1795     /* The following stacks are used to manage isolate sequences. Those
1796        sequences may be nested, but obviously never more deeply than the
1797        maximum explicit embedding level.
1798        lastStack is the index of the last used entry in the stack. A value of -1
1799        means that there is no open isolate sequence.
1800        lastStack is reset to -1 on paragraph boundaries. */
1801     /* The following stack contains the position of the initiator of
1802        each open isolate sequence */
1803         int[] isolateStartStack= new int[MAX_EXPLICIT_LEVEL+1];
1804     /* The following stack contains the last known state before
1805        encountering the initiator of an isolate sequence */
1806         byte[] previousStateStack = new byte[MAX_EXPLICIT_LEVEL+1];
1807         int  stackLast=-1;
1808 
1809         if ((reorderingOptions & OPTION_STREAMING) != 0)
1810             length = 0;
1811         defaultParaLevel = (byte)(paraLevel & 1);
1812 
1813         if (isDefaultLevel) {
1814             paras_level[0] = defaultParaLevel;
1815             lastStrong = defaultParaLevel;
1816             if (prologue != null &&                        /* there is a prologue */
1817                 (dirProp = firstL_R_AL()) != ON) {     /* with a strong character */
1818                 if (dirProp == L)
1819                     paras_level[0] = 0;             /* set the default para level */
1820                 else
1821                     paras_level[0] = 1;             /* set the default para level */
1822                 state = NOT_SEEKING_STRONG;
1823             } else {
1824                 state = SEEKING_STRONG_FOR_PARA;
1825             }
1826         } else {
1827             paras_level[0] = paraLevel;
1828             state = NOT_SEEKING_STRONG;
1829         }
1830         /* count paragraphs and determine the paragraph level (P2..P3) */
1831         /*
1832          * see comment on constant fields:
1833          * the LEVEL_DEFAULT_XXX values are designed so that
1834          * their low-order bit alone yields the intended default
1835          */
1836 
1837         for (i = 0; i < originalLength; /* i is incremented in the loop */) {
1838             i0 = i;                     /* index of first code unit */
1839             uchar = UTF16.charAt(text, 0, originalLength, i);
1840             i += UTF16.getCharCount(uchar);
1841             i1 = i - 1; /* index of last code unit, gets the directional property */
1842 
1843             dirProp = (byte)getCustomizedClass(uchar);
1844             flags |= DirPropFlag(dirProp);
1845             dirProps[i1] = dirProp;
1846             if (i1 > i0) {     /* set previous code units' properties to BN */
1847                 flags |= DirPropFlag(BN);
1848                 do {
1849                     dirProps[--i1] = BN;
1850                 } while (i1 > i0);
1851             }
1852             if (removeBidiControls && IsBidiControlChar(uchar)) {
1853                 controlCount++;
1854             }
1855             if (dirProp == L) {
1856                 if (state == SEEKING_STRONG_FOR_PARA) {
1857                     paras_level[paraCount - 1] = 0;
1858                     state = NOT_SEEKING_STRONG;
1859                 }
1860                 else if (state == SEEKING_STRONG_FOR_FSI) {
1861                     if (stackLast <= MAX_EXPLICIT_LEVEL) {
1862                         /* no need for next statement, already set by default */
1863                         /* dirProps[isolateStartStack[stackLast]] = LRI; */
1864                         flags |= DirPropFlag(LRI);
1865                     }
1866                     state = LOOKING_FOR_PDI;
1867                 }
1868                 lastStrong = L;
1869                 continue;
1870             }
1871             if (dirProp == R || dirProp == AL) {
1872                 if (state == SEEKING_STRONG_FOR_PARA) {
1873                     paras_level[paraCount - 1] = 1;
1874                     state = NOT_SEEKING_STRONG;
1875                 }
1876                 else if (state == SEEKING_STRONG_FOR_FSI) {
1877                     if (stackLast <= MAX_EXPLICIT_LEVEL) {
1878                         dirProps[isolateStartStack[stackLast]] = RLI;
1879                         flags |= DirPropFlag(RLI);
1880                     }
1881                     state = LOOKING_FOR_PDI;
1882                 }
1883                 lastStrong = R;
1884                 if (dirProp == AL)
1885                     lastArabicPos = i - 1;
1886                 continue;
1887             }
1888             if (dirProp >= FSI && dirProp <= RLI) { /* FSI, LRI or RLI */
1889                 stackLast++;
1890                 if (stackLast <= MAX_EXPLICIT_LEVEL) {
1891                     isolateStartStack[stackLast] = i - 1;
1892                     previousStateStack[stackLast] = state;
1893                 }
1894                 if (dirProp == FSI) {
1895                     dirProps[i-1] = LRI;    /* default if no strong char */
1896                     state = SEEKING_STRONG_FOR_FSI;
1897                 }
1898                 else
1899                     state = LOOKING_FOR_PDI;
1900                 continue;
1901             }
1902             if (dirProp == PDI) {
1903                 if (state == SEEKING_STRONG_FOR_FSI) {
1904                     if (stackLast <= MAX_EXPLICIT_LEVEL) {
1905                         /* no need for next statement, already set by default */
1906                         /* dirProps[isolateStartStack[stackLast]] = LRI; */
1907                         flags |= DirPropFlag(LRI);
1908                     }
1909                 }
1910                 if (stackLast >= 0) {
1911                     if (stackLast <= MAX_EXPLICIT_LEVEL)
1912                         state = previousStateStack[stackLast];
1913                     stackLast--;
1914                 }
1915                 continue;
1916             }
1917             if (dirProp == B) {
1918                 if (i < originalLength && uchar == CR && text[i] == LF) /* do nothing on the CR */
1919                     continue;
1920                 paras_limit[paraCount - 1] = i;
1921                 if (isDefaultLevelInverse && lastStrong == R)
1922                     paras_level[paraCount - 1] = 1;
1923                 if ((reorderingOptions & OPTION_STREAMING) != 0) {
1924                 /* When streaming, we only process whole paragraphs
1925                    thus some updates are only done on paragraph boundaries */
1926                    length = i;          /* i is index to next character */
1927                    this.controlCount = controlCount;
1928                 }
1929                 if (i < originalLength) {       /* B not last char in text */
1930                     paraCount++;
1931                     checkParaCount();   /* check that there is enough memory for a new para entry */
1932                     if (isDefaultLevel) {
1933                         paras_level[paraCount - 1] = defaultParaLevel;
1934                         state = SEEKING_STRONG_FOR_PARA;
1935                         lastStrong = defaultParaLevel;
1936                     } else {
1937                         paras_level[paraCount - 1] = paraLevel;
1938                         state = NOT_SEEKING_STRONG;
1939                     }
1940                     stackLast = -1;
1941                 }
1942                 continue;
1943             }
1944         }
1945         /* +Ignore still open isolate sequences with overflow */
1946         if (stackLast > MAX_EXPLICIT_LEVEL) {
1947             stackLast = MAX_EXPLICIT_LEVEL;
1948             state=SEEKING_STRONG_FOR_FSI;   /* to be on the safe side */
1949         }
1950         /* Resolve direction of still unresolved open FSI sequences */
1951         while (stackLast >= 0) {
1952             if (state == SEEKING_STRONG_FOR_FSI) {
1953                 /* no need for next statement, already set by default */
1954                 /* dirProps[isolateStartStack[stackLast]] = LRI; */
1955                 flags |= DirPropFlag(LRI);
1956                 break;
1957             }
1958             state = previousStateStack[stackLast];
1959             stackLast--;
1960         }
1961         /* When streaming, ignore text after the last paragraph separator */
1962         if ((reorderingOptions & OPTION_STREAMING) != 0) {
1963             if (length < originalLength)
1964                 paraCount--;
1965         } else {
1966             paras_limit[paraCount - 1] = originalLength;
1967             this.controlCount = controlCount;
1968         }
1969         /* For inverse bidi, default para direction is RTL if there is
1970            a strong R or AL at either end of the paragraph */
1971         if (isDefaultLevelInverse && lastStrong == R) {
1972             paras_level[paraCount - 1] = 1;
1973         }
1974         if (isDefaultLevel) {
1975             paraLevel = paras_level[0];
1976         }
1977         /* The following is needed to resolve the text direction for default level
1978            paragraphs containing no strong character */
1979         for (i = 0; i < paraCount; i++)
1980             flags |= DirPropFlagLR(paras_level[i]);
1981 
1982         if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
1983             flags |= DirPropFlag(L);
1984         }
1985     }
1986 
1987     /* determine the paragraph level at position index */
GetParaLevelAt(int pindex)1988     byte GetParaLevelAt(int pindex)
1989     {
1990         if (defaultParaLevel == 0 || pindex < paras_limit[0])
1991             return paraLevel;
1992         int i;
1993         for (i = 1; i < paraCount; i++)
1994             if (pindex < paras_limit[i])
1995                 break;
1996         if (i >= paraCount)
1997             i = paraCount - 1;
1998         return paras_level[i];
1999     }
2000 
2001     /* Functions for handling paired brackets ----------------------------------- */
2002 
2003     /* In the isoRuns array, the first entry is used for text outside of any
2004        isolate sequence.  Higher entries are used for each more deeply nested
2005        isolate sequence. isoRunLast is the index of the last used entry.  The
2006        openings array is used to note the data of opening brackets not yet
2007        matched by a closing bracket, or matched but still susceptible to change
2008        level.
2009        Each isoRun entry contains the index of the first and
2010        one-after-last openings entries for pending opening brackets it
2011        contains.  The next openings entry to use is the one-after-last of the
2012        most deeply nested isoRun entry.
2013        isoRun entries also contain their current embedding level and the last
2014        encountered strong character, since these will be needed to resolve
2015        the level of paired brackets.  */
2016 
bracketInit(BracketData bd)2017     private void bracketInit(BracketData bd) {
2018         bd.isoRunLast = 0;
2019         bd.isoRuns[0] = new IsoRun();
2020         bd.isoRuns[0].start = 0;
2021         bd.isoRuns[0].limit = 0;
2022         bd.isoRuns[0].level = GetParaLevelAt(0);
2023         bd.isoRuns[0].lastStrong = bd.isoRuns[0].lastBase = bd.isoRuns[0].contextDir = (byte)(GetParaLevelAt(0) & 1);
2024         bd.isoRuns[0].contextPos = 0;
2025         bd.openings = new Opening[SIMPLE_OPENINGS_COUNT];
2026         bd.isNumbersSpecial = reorderingMode == REORDER_NUMBERS_SPECIAL ||
2027                               reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
2028     }
2029 
2030     /* paragraph boundary */
bracketProcessB(BracketData bd, byte level)2031     private void bracketProcessB(BracketData bd, byte level) {
2032         bd.isoRunLast = 0;
2033         bd.isoRuns[0].limit = 0;
2034         bd.isoRuns[0].level = level;
2035         bd.isoRuns[0].lastStrong = bd.isoRuns[0].lastBase = bd.isoRuns[0].contextDir = (byte)(level & 1);
2036         bd.isoRuns[0].contextPos = 0;
2037     }
2038 
2039     /* LRE, LRO, RLE, RLO, PDF */
bracketProcessBoundary(BracketData bd, int lastCcPos, byte contextLevel, byte embeddingLevel)2040     private void bracketProcessBoundary(BracketData bd, int lastCcPos,
2041                                         byte contextLevel, byte embeddingLevel) {
2042         IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2043         if ((DirPropFlag(dirProps[lastCcPos]) & MASK_ISO) != 0) /* after an isolate */
2044             return;
2045         if (NoOverride(embeddingLevel) > NoOverride(contextLevel))  /* not a PDF */
2046             contextLevel = embeddingLevel;
2047         pLastIsoRun.limit = pLastIsoRun.start;
2048         pLastIsoRun.level = embeddingLevel;
2049         pLastIsoRun.lastStrong = pLastIsoRun.lastBase = pLastIsoRun.contextDir = (byte)(contextLevel & 1);
2050         pLastIsoRun.contextPos = lastCcPos;
2051     }
2052 
2053     /* LRI or RLI */
bracketProcessLRI_RLI(BracketData bd, byte level)2054     private void bracketProcessLRI_RLI(BracketData bd, byte level) {
2055         IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2056         short lastLimit;
2057         pLastIsoRun.lastBase = ON;
2058         lastLimit = pLastIsoRun.limit;
2059         bd.isoRunLast++;
2060         pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2061         if (pLastIsoRun == null)
2062             pLastIsoRun = bd.isoRuns[bd.isoRunLast] = new IsoRun();
2063         pLastIsoRun.start = pLastIsoRun.limit = lastLimit;
2064         pLastIsoRun.level = level;
2065         pLastIsoRun.lastStrong = pLastIsoRun.lastBase = pLastIsoRun.contextDir = (byte)(level & 1);
2066         pLastIsoRun.contextPos = 0;
2067     }
2068 
2069     /* PDI */
bracketProcessPDI(BracketData bd)2070     private void bracketProcessPDI(BracketData bd) {
2071         IsoRun pLastIsoRun;
2072         bd.isoRunLast--;
2073         pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2074         pLastIsoRun.lastBase = ON;
2075     }
2076 
2077     /* newly found opening bracket: create an openings entry */
bracketAddOpening(BracketData bd, char match, int position)2078     private void bracketAddOpening(BracketData bd, char match, int position) {
2079         IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2080         Opening pOpening;
2081         if (pLastIsoRun.limit >= bd.openings.length) {  /* no available new entry */
2082             Opening[] saveOpenings = bd.openings;
2083             int count;
2084             try {
2085                 count = bd.openings.length;
2086                 bd.openings = new Opening[count * 2];
2087             } catch (Exception e) {
2088                 throw new OutOfMemoryError("Failed to allocate memory for openings");
2089             }
2090             System.arraycopy(saveOpenings, 0, bd.openings, 0, count);
2091         }
2092         pOpening = bd.openings[pLastIsoRun.limit];
2093         if (pOpening == null)
2094             pOpening = bd.openings[pLastIsoRun.limit]= new Opening();
2095         pOpening.position = position;
2096         pOpening.match = match;
2097         pOpening.contextDir = pLastIsoRun.contextDir;
2098         pOpening.contextPos = pLastIsoRun.contextPos;
2099         pOpening.flags = 0;
2100         pLastIsoRun.limit++;
2101     }
2102 
2103     /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
fixN0c(BracketData bd, int openingIndex, int newPropPosition, byte newProp)2104     private void fixN0c(BracketData bd, int openingIndex, int newPropPosition, byte newProp) {
2105         /* This function calls itself recursively */
2106         IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2107         Opening qOpening;
2108         int k, openingPosition, closingPosition;
2109         for (k = openingIndex+1; k < pLastIsoRun.limit; k++) {
2110             qOpening = bd.openings[k];
2111             if (qOpening.match >= 0)    /* not an N0c match */
2112                 continue;
2113             if (newPropPosition < qOpening.contextPos)
2114                 break;
2115             if (newPropPosition >= qOpening.position)
2116                 continue;
2117             if (newProp == qOpening.contextDir)
2118                 break;
2119             openingPosition = qOpening.position;
2120             dirProps[openingPosition] = newProp;
2121             closingPosition = -(qOpening.match);
2122             dirProps[closingPosition] = newProp;
2123             qOpening.match = 0;                                 /* prevent further changes */
2124             fixN0c(bd, k, openingPosition, newProp);
2125             fixN0c(bd, k, closingPosition, newProp);
2126         }
2127     }
2128 
2129     /* process closing bracket; return L or R if N0b or N0c, ON if N0d */
bracketProcessClosing(BracketData bd, int openIdx, int position)2130     private byte bracketProcessClosing(BracketData bd, int openIdx, int position) {
2131         IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2132         Opening pOpening, qOpening;
2133         byte direction;
2134         boolean stable;
2135         byte newProp;
2136         pOpening = bd.openings[openIdx];
2137         direction = (byte)(pLastIsoRun.level & 1);
2138         stable = true;          /* assume stable until proved otherwise */
2139 
2140         /* The stable flag is set when brackets are paired and their
2141            level is resolved and cannot be changed by what will be
2142            found later in the source string.
2143            An unstable match can occur only when applying N0c, where
2144            the resolved level depends on the preceding context, and
2145            this context may be affected by text occurring later.
2146            Example: RTL paragraph containing:  abc[(latin) HEBREW]
2147            When the closing parenthesis is encountered, it appears
2148            that N0c1 must be applied since 'abc' sets an opposite
2149            direction context and both parentheses receive level 2.
2150            However, when the closing square bracket is processed,
2151            N0b applies because of 'HEBREW' being included within the
2152            brackets, thus the square brackets are treated like R and
2153            receive level 1. However, this changes the preceding
2154            context of the opening parenthesis, and it now appears
2155            that N0c2 must be applied to the parentheses rather than
2156            N0c1. */
2157 
2158             if ((direction == 0 && (pOpening.flags & FOUND_L) > 0) ||
2159                 (direction == 1 && (pOpening.flags & FOUND_R) > 0)) {   /* N0b */
2160                 newProp = direction;
2161             }
2162             else if ((pOpening.flags & (FOUND_L | FOUND_R)) != 0) {     /* N0c */
2163                     /* it is stable if there is no preceding text or in
2164                        conditions too complicated and not worth checking */
2165                     stable = (openIdx == pLastIsoRun.start);
2166                 if (direction != pOpening.contextDir)
2167                     newProp = pOpening.contextDir;                      /* N0c1 */
2168                 else
2169                     newProp = direction;                                /* N0c2 */
2170             } else {
2171             /* forget this and any brackets nested within this pair */
2172             pLastIsoRun.limit = (short)openIdx;
2173             return ON;                                                  /* N0d */
2174         }
2175         dirProps[pOpening.position] = newProp;
2176         dirProps[position] = newProp;
2177         /* Update nested N0c pairs that may be affected */
2178         fixN0c(bd, openIdx, pOpening.position, newProp);
2179         if (stable) {
2180             pLastIsoRun.limit = (short)openIdx; /* forget any brackets nested within this pair */
2181             /* remove lower located synonyms if any */
2182             while (pLastIsoRun.limit > pLastIsoRun.start &&
2183                    bd.openings[pLastIsoRun.limit - 1].position == pOpening.position)
2184                 pLastIsoRun.limit--;
2185         } else {
2186             int k;
2187             pOpening.match = -position;
2188             /* neutralize lower located synonyms if any */
2189             k = openIdx - 1;
2190             while (k >= pLastIsoRun.start &&
2191                    bd.openings[k].position == pOpening.position)
2192                 bd.openings[k--].match = 0;
2193             /* neutralize any unmatched opening between the current pair;
2194                this will also neutralize higher located synonyms if any */
2195             for (k = openIdx + 1; k < pLastIsoRun.limit; k++) {
2196                 qOpening =bd.openings[k];
2197                 if (qOpening.position >= position)
2198                     break;
2199                 if (qOpening.match > 0)
2200                     qOpening.match = 0;
2201             }
2202         }
2203         return newProp;
2204     }
2205 
2206     /* handle strong characters, digits and candidates for closing brackets */
bracketProcessChar(BracketData bd, int position)2207     private void bracketProcessChar(BracketData bd, int position) {
2208         IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2209         byte dirProp, newProp;
2210         byte level;
2211         dirProp = dirProps[position];
2212         if (dirProp == ON) {
2213             char c, match;
2214             int idx;
2215             /* First see if it is a matching closing bracket. Hopefully, this is
2216                more efficient than checking if it is a closing bracket at all */
2217             c = text[position];
2218             for (idx = pLastIsoRun.limit - 1; idx >= pLastIsoRun.start; idx--) {
2219                 if (bd.openings[idx].match != c)
2220                     continue;
2221                 /* We have a match */
2222                 newProp = bracketProcessClosing(bd, idx, position);
2223                 if(newProp == ON) {         /* N0d */
2224                     c = 0;          /* prevent handling as an opening */
2225                     break;
2226                 }
2227                 pLastIsoRun.lastBase = ON;
2228                 pLastIsoRun.contextDir = newProp;
2229                 pLastIsoRun.contextPos = position;
2230                 level = levels[position];
2231                 if ((level & LEVEL_OVERRIDE) != 0) {    /* X4, X5 */
2232                     short flag;
2233                     int i;
2234                     newProp = (byte)(level & 1);
2235                     pLastIsoRun.lastStrong = newProp;
2236                     flag = (short)DirPropFlag(newProp);
2237                     for (i = pLastIsoRun.start; i < idx; i++)
2238                         bd.openings[i].flags |= flag;
2239                     /* matching brackets are not overridden by LRO/RLO */
2240                     levels[position] &= ~LEVEL_OVERRIDE;
2241                 }
2242                 /* matching brackets are not overridden by LRO/RLO */
2243                 levels[bd.openings[idx].position] &= ~LEVEL_OVERRIDE;
2244                 return;
2245             }
2246             /* We get here only if the ON character is not a matching closing
2247                bracket or it is a case of N0d */
2248             /* Now see if it is an opening bracket */
2249             if (c != 0)
2250                 match = (char)UCharacter.getBidiPairedBracket(c); /* get the matching char */
2251             else
2252                 match = 0;
2253             if (match != c &&               /* has a matching char */
2254                 UCharacter.getIntPropertyValue(c, UProperty.BIDI_PAIRED_BRACKET_TYPE) ==
2255                     /* opening bracket */         UCharacter.BidiPairedBracketType.OPEN) {
2256                 /* special case: process synonyms
2257                    create an opening entry for each synonym */
2258                 if (match == 0x232A) {      /* RIGHT-POINTING ANGLE BRACKET */
2259                     bracketAddOpening(bd, (char)0x3009, position);
2260                 }
2261                 else if (match == 0x3009) { /* RIGHT ANGLE BRACKET */
2262                     bracketAddOpening(bd, (char)0x232A, position);
2263                 }
2264                 bracketAddOpening(bd, match, position);
2265             }
2266         }
2267         level = levels[position];
2268         if ((level & LEVEL_OVERRIDE) != 0) {    /* X4, X5 */
2269             newProp = (byte)(level & 1);
2270             if (dirProp != S && dirProp != WS && dirProp != ON)
2271                 dirProps[position] = newProp;
2272             pLastIsoRun.lastBase = newProp;
2273             pLastIsoRun.lastStrong = newProp;
2274             pLastIsoRun.contextDir = newProp;
2275             pLastIsoRun.contextPos = position;
2276         }
2277         else if (dirProp <= R || dirProp == AL) {
2278             newProp = DirFromStrong(dirProp);
2279             pLastIsoRun.lastBase = dirProp;
2280             pLastIsoRun.lastStrong = dirProp;
2281             pLastIsoRun.contextDir = newProp;
2282             pLastIsoRun.contextPos = position;
2283         }
2284         else if(dirProp == EN) {
2285             pLastIsoRun.lastBase = EN;
2286             if (pLastIsoRun.lastStrong == L) {
2287                 newProp = L;                    /* W7 */
2288                 if (!bd.isNumbersSpecial)
2289                     dirProps[position] = ENL;
2290                 pLastIsoRun.contextDir = L;
2291                 pLastIsoRun.contextPos = position;
2292             }
2293             else {
2294                 newProp = R;                    /* N0 */
2295                 if (pLastIsoRun.lastStrong == AL)
2296                     dirProps[position] = AN;    /* W2 */
2297                 else
2298                     dirProps[position] = ENR;
2299                 pLastIsoRun.contextDir = R;
2300                 pLastIsoRun.contextPos = position;
2301             }
2302         }
2303         else if (dirProp == AN) {
2304             newProp = R;                        /* N0 */
2305             pLastIsoRun.lastBase = AN;
2306             pLastIsoRun.contextDir = R;
2307             pLastIsoRun.contextPos = position;
2308         }
2309         else if (dirProp == NSM) {
2310             /* if the last real char was ON, change NSM to ON so that it
2311                will stay ON even if the last real char is a bracket which
2312                may be changed to L or R */
2313             newProp = pLastIsoRun.lastBase;
2314             if (newProp == ON)
2315                 dirProps[position] = newProp;
2316         }
2317         else {
2318             newProp = dirProp;
2319             pLastIsoRun.lastBase = dirProp;
2320         }
2321         if (newProp <= R || newProp == AL) {
2322             int i;
2323             short flag = (short)DirPropFlag(DirFromStrong(newProp));
2324             for (i = pLastIsoRun.start; i < pLastIsoRun.limit; i++)
2325                 if (position > bd.openings[i].position)
2326                     bd.openings[i].flags |= flag;
2327         }
2328     }
2329 
2330     /* perform (X1)..(X9) ------------------------------------------------------- */
2331 
2332     /* determine if the text is mixed-directional or single-directional */
directionFromFlags()2333     private byte directionFromFlags() {
2334         /* if the text contains AN and neutrals, then some neutrals may become RTL */
2335         if (!((flags & MASK_RTL) != 0 ||
2336               ((flags & DirPropFlag(AN)) != 0 &&
2337                (flags & MASK_POSSIBLE_N) != 0))) {
2338             return LTR;
2339         } else if ((flags & MASK_LTR) == 0) {
2340             return RTL;
2341         } else {
2342             return MIXED;
2343         }
2344     }
2345 
2346     /*
2347  * Resolve the explicit levels as specified by explicit embedding codes.
2348  * Recalculate the flags to have them reflect the real properties
2349  * after taking the explicit embeddings into account.
2350  *
2351  * The BiDi algorithm is designed to result in the same behavior whether embedding
2352  * levels are externally specified (from "styled text", supposedly the preferred
2353  * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
2354  * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
2355  * However, in a real implementation, the removal of these codes and their index
2356  * positions in the plain text is undesirable since it would result in
2357  * reallocated, reindexed text.
2358  * Instead, this implementation leaves the codes in there and just ignores them
2359  * in the subsequent processing.
2360  * In order to get the same reordering behavior, positions with a BN or a not-isolate
2361  * explicit embedding code just get the same level assigned as the last "real"
2362  * character.
2363  *
2364  * Some implementations, not this one, then overwrite some of these
2365  * directionality properties at "real" same-level-run boundaries by
2366  * L or R codes so that the resolution of weak types can be performed on the
2367  * entire paragraph at once instead of having to parse it once more and
2368  * perform that resolution on same-level-runs.
2369  * This limits the scope of the implicit rules in effectively
2370  * the same way as the run limits.
2371  *
2372  * Instead, this implementation does not modify these codes, except for
2373  * paired brackets whose properties (ON) may be replaced by L or R.
2374  * On one hand, the paragraph has to be scanned for same-level-runs, but
2375  * on the other hand, this saves another loop to reset these codes,
2376  * or saves making and modifying a copy of dirProps[].
2377  *
2378  *
2379  * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
2380  *
2381  *
2382  * Handling the stack of explicit levels (Xn):
2383  *
2384  * With the BiDi stack of explicit levels, as pushed with each
2385  * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
2386  * the explicit level must never exceed MAX_EXPLICIT_LEVEL.
2387  *
2388  * In order to have a correct push-pop semantics even in the case of overflows,
2389  * overflow counters and a valid isolate counter are used as described in UAX#9
2390  * section 3.3.2 "Explicit Levels and Directions".
2391  *
2392  * This implementation assumes that MAX_EXPLICIT_LEVEL is odd.
2393  *
2394  * Returns the direction
2395  *
2396  */
resolveExplicitLevels()2397     private byte resolveExplicitLevels() {
2398         int i = 0;
2399         byte dirProp;
2400         byte level = GetParaLevelAt(0);
2401         byte dirct;
2402         isolateCount = 0;
2403 
2404         /* determine if the text is mixed-directional or single-directional */
2405         dirct = directionFromFlags();
2406 
2407         /* we may not need to resolve any explicit levels */
2408         if (dirct != MIXED) {
2409             /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
2410             return dirct;
2411         }
2412         if (reorderingMode > REORDER_LAST_LOGICAL_TO_VISUAL) {
2413             /* inverse BiDi: mixed, but all characters are at the same embedding level */
2414             /* set all levels to the paragraph level */
2415             int paraIndex, start, limit;
2416             for (paraIndex = 0; paraIndex < paraCount; paraIndex++) {
2417                 if (paraIndex == 0)
2418                     start = 0;
2419                 else
2420                     start = paras_limit[paraIndex - 1];
2421                 limit = paras_limit[paraIndex];
2422                 level = paras_level[paraIndex];
2423                 for (i = start; i < limit; i++)
2424                     levels[i] =level;
2425             }
2426             return dirct;               /* no bracket matching for inverse BiDi */
2427         }
2428         if ((flags & (MASK_EXPLICIT | MASK_ISO)) == 0) {
2429             /* no embeddings, set all levels to the paragraph level */
2430             /* we still have to perform bracket matching */
2431             int paraIndex, start, limit;
2432             BracketData bracketData = new BracketData();
2433             bracketInit(bracketData);
2434             for (paraIndex = 0; paraIndex < paraCount; paraIndex++) {
2435                 if (paraIndex == 0)
2436                     start = 0;
2437                 else
2438                     start = paras_limit[paraIndex-1];
2439                 limit = paras_limit[paraIndex];
2440                 level = paras_level[paraIndex];
2441                 for (i = start; i < limit; i++) {
2442                     levels[i] = level;
2443                     dirProp = dirProps[i];
2444                     if (dirProp == BN)
2445                         continue;
2446                     if (dirProp == B) {
2447                         if ((i + 1) < length) {
2448                             if (text[i] == CR && text[i + 1] == LF)
2449                                 continue;   /* skip CR when followed by LF */
2450                             bracketProcessB(bracketData, level);
2451                         }
2452                         continue;
2453                     }
2454                     bracketProcessChar(bracketData, i);
2455                 }
2456             }
2457             return dirct;
2458         }
2459         /* continue to perform (Xn) */
2460 
2461         /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
2462         /* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */
2463         byte embeddingLevel = level, newLevel;
2464         byte previousLevel = level; /* previous level for regular (not CC) characters */
2465         int lastCcPos = 0;          /* index of last effective LRx,RLx, PDx */
2466 
2467         /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
2468            stackLast points to its current entry. */
2469         short[] stack = new short[MAX_EXPLICIT_LEVEL + 2];  /* we never push anything >= MAX_EXPLICIT_LEVEL
2470                                                                but we need one more entry as base */
2471         int stackLast = 0;
2472         int overflowIsolateCount = 0;
2473         int overflowEmbeddingCount = 0;
2474         int validIsolateCount = 0;
2475         BracketData bracketData = new BracketData();
2476         bracketInit(bracketData);
2477         stack[0] = level;       /* initialize base entry to para level, no override, no isolate */
2478 
2479         /* recalculate the flags */
2480         flags = 0;
2481 
2482         for (i = 0; i < length; i++) {
2483             dirProp = dirProps[i];
2484             switch (dirProp) {
2485             case LRE:
2486             case RLE:
2487             case LRO:
2488             case RLO:
2489                 /* (X2, X3, X4, X5) */
2490                 flags |= DirPropFlag(BN);
2491                 levels[i] = previousLevel;
2492                 if (dirProp == LRE || dirProp == LRO)
2493                     /* least greater even level */
2494                     newLevel = (byte)((embeddingLevel+2) & ~(LEVEL_OVERRIDE | 1));
2495                 else
2496                     /* least greater odd level */
2497                     newLevel = (byte)((NoOverride(embeddingLevel) + 1) | 1);
2498                 if (newLevel <= MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0 &&
2499                                                       overflowEmbeddingCount == 0) {
2500                     lastCcPos = i;
2501                     embeddingLevel = newLevel;
2502                     if (dirProp == LRO || dirProp == RLO)
2503                         embeddingLevel |= LEVEL_OVERRIDE;
2504                     stackLast++;
2505                     stack[stackLast] = embeddingLevel;
2506                     /* we don't need to set LEVEL_OVERRIDE off for LRE and RLE
2507                        since this has already been done for newLevel which is
2508                        the source for embeddingLevel.
2509                      */
2510                 } else {
2511                     if (overflowIsolateCount == 0)
2512                         overflowEmbeddingCount++;
2513                 }
2514                 break;
2515             case PDF:
2516                 /* (X7) */
2517                 flags |= DirPropFlag(BN);
2518                 levels[i] = previousLevel;
2519                 /* handle all the overflow cases first */
2520                 if (overflowIsolateCount > 0) {
2521                     break;
2522                 }
2523                 if (overflowEmbeddingCount > 0) {
2524                     overflowEmbeddingCount--;
2525                     break;
2526                 }
2527                 if (stackLast > 0 && stack[stackLast] < ISOLATE) {   /* not an isolate entry */
2528                     lastCcPos = i;
2529                     stackLast--;
2530                     embeddingLevel = (byte)stack[stackLast];
2531                 }
2532                 break;
2533             case LRI:
2534             case RLI:
2535                 flags |= DirPropFlag(ON) | DirPropFlagLR(embeddingLevel);
2536                 levels[i] = NoOverride(embeddingLevel);
2537                 if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
2538                     bracketProcessBoundary(bracketData, lastCcPos,
2539                                            previousLevel, embeddingLevel);
2540                     flags |= DirPropFlagMultiRuns;
2541                 }
2542                 previousLevel = embeddingLevel;
2543                 /* (X5a, X5b) */
2544                 if (dirProp == LRI)
2545                     /* least greater even level */
2546                     newLevel=(byte)((embeddingLevel+2)&~(LEVEL_OVERRIDE|1));
2547                 else
2548                     /* least greater odd level */
2549                     newLevel=(byte)((NoOverride(embeddingLevel)+1)|1);
2550                 if (newLevel <= MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0
2551                                                    && overflowEmbeddingCount == 0) {
2552                     flags |= DirPropFlag(dirProp);
2553                     lastCcPos = i;
2554                     validIsolateCount++;
2555                     if (validIsolateCount > isolateCount)
2556                         isolateCount = validIsolateCount;
2557                     embeddingLevel = newLevel;
2558                     /* we can increment stackLast without checking because newLevel
2559                        will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
2560                     stackLast++;
2561                     stack[stackLast] = (short)(embeddingLevel + ISOLATE);
2562                     bracketProcessLRI_RLI(bracketData, embeddingLevel);
2563                 } else {
2564                     /* make it WS so that it is handled by adjustWSLevels() */
2565                     dirProps[i] = WS;
2566                     overflowIsolateCount++;
2567                 }
2568                 break;
2569             case PDI:
2570                 if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
2571                     bracketProcessBoundary(bracketData, lastCcPos,
2572                                            previousLevel, embeddingLevel);
2573                     flags |= DirPropFlagMultiRuns;
2574                 }
2575                 /* (X6a) */
2576                 if (overflowIsolateCount > 0) {
2577                     overflowIsolateCount--;
2578                     /* make it WS so that it is handled by adjustWSLevels() */
2579                     dirProps[i] = WS;
2580                 }
2581                 else if (validIsolateCount > 0) {
2582                     flags |= DirPropFlag(PDI);
2583                     lastCcPos = i;
2584                     overflowEmbeddingCount = 0;
2585                     while (stack[stackLast] < ISOLATE)  /* pop embedding entries */
2586                         stackLast--;                    /* until the last isolate entry */
2587                     stackLast--;                        /* pop also the last isolate entry */
2588                     validIsolateCount--;
2589                     bracketProcessPDI(bracketData);
2590                 } else
2591                     /* make it WS so that it is handled by adjustWSLevels() */
2592                     dirProps[i] = WS;
2593                 embeddingLevel = (byte)(stack[stackLast] & ~ISOLATE);
2594                 flags |= DirPropFlag(ON) | DirPropFlagLR(embeddingLevel);
2595                 previousLevel = embeddingLevel;
2596                 levels[i] = NoOverride(embeddingLevel);
2597                 break;
2598             case B:
2599                 flags |= DirPropFlag(B);
2600                 levels[i] = GetParaLevelAt(i);
2601                 if ((i + 1) < length) {
2602                     if (text[i] == CR && text[i + 1] == LF)
2603                         break;          /* skip CR when followed by LF */
2604                     overflowEmbeddingCount = overflowIsolateCount = 0;
2605                     validIsolateCount = 0;
2606                     stackLast = 0;
2607                     previousLevel = embeddingLevel = GetParaLevelAt(i + 1);
2608                     stack[0] = embeddingLevel;   /* initialize base entry to para level, no override, no isolate */
2609                     bracketProcessB(bracketData, embeddingLevel);
2610                 }
2611                 break;
2612             case BN:
2613                 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
2614                 /* they will get their levels set correctly in adjustWSLevels() */
2615                 levels[i] = previousLevel;
2616                 flags |= DirPropFlag(BN);
2617                 break;
2618             default:
2619                 /* all other types are normal characters and get the "real" level */
2620                 if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
2621                     bracketProcessBoundary(bracketData, lastCcPos,
2622                                            previousLevel, embeddingLevel);
2623                     flags |= DirPropFlagMultiRuns;
2624                     if ((embeddingLevel & LEVEL_OVERRIDE) != 0)
2625                         flags |= DirPropFlagO(embeddingLevel);
2626                     else
2627                         flags |= DirPropFlagE(embeddingLevel);
2628                 }
2629                 previousLevel = embeddingLevel;
2630                 levels[i] = embeddingLevel;
2631                 bracketProcessChar(bracketData, i);
2632                 /* the dirProp may have been changed in bracketProcessChar() */
2633                 flags |= DirPropFlag(dirProps[i]);
2634                 break;
2635             }
2636         }
2637         if ((flags & MASK_EMBEDDING) != 0) {
2638             flags |= DirPropFlagLR(paraLevel);
2639         }
2640         if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
2641             flags |= DirPropFlag(L);
2642         }
2643         /* again, determine if the text is mixed-directional or single-directional */
2644         dirct = directionFromFlags();
2645 
2646         return dirct;
2647     }
2648 
2649     /**
2650      * Use a pre-specified embedding levels array:
2651      *
2652      * <p>Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
2653      * ignore all explicit codes (X9),
2654      * and check all the preset levels.
2655      *
2656      * <p>Recalculate the flags to have them reflect the real properties
2657      * after taking the explicit embeddings into account.
2658      */
checkExplicitLevels()2659     private byte checkExplicitLevels() {
2660         int isolateCount = 0;
2661 
2662         this.flags = 0;     /* collect all directionalities in the text */
2663         this.isolateCount = 0;
2664 
2665         int currentParaIndex = 0;
2666         int currentParaLimit = paras_limit[0];
2667         byte currentParaLevel = paraLevel;
2668 
2669         for (int i = 0; i < length; ++i) {
2670             byte level = levels[i];
2671             byte dirProp = dirProps[i];
2672             if (dirProp == LRI || dirProp == RLI) {
2673                 isolateCount++;
2674                 if (isolateCount > this.isolateCount)
2675                     this.isolateCount = isolateCount;
2676             }
2677             else if (dirProp == PDI)
2678                 isolateCount--;
2679             else if (dirProp == B)
2680                 isolateCount = 0;
2681 
2682             // optimized version of  byte currentParaLevel = GetParaLevelAt(i);
2683             if (defaultParaLevel != 0 &&
2684                     i == currentParaLimit && (currentParaIndex + 1) < paraCount) {
2685                 currentParaLevel = paras_level[++currentParaIndex];
2686                 currentParaLimit = paras_limit[currentParaIndex];
2687             }
2688 
2689             int overrideFlag = level & LEVEL_OVERRIDE;
2690             level &= ~LEVEL_OVERRIDE;
2691             if (level < currentParaLevel || MAX_EXPLICIT_LEVEL < level) {
2692                 if (level == 0) {
2693                     if (dirProp == B) {
2694                         // Paragraph separators are ok with explicit level 0.
2695                         // Prevents reordering of paragraphs.
2696                     } else {
2697                         // Treat explicit level 0 as a wildcard for the paragraph level.
2698                         // Avoid making the caller guess what the paragraph level would be.
2699                         level = currentParaLevel;
2700                         levels[i] = (byte)(level | overrideFlag);
2701                     }
2702                 } else {
2703                     // 1 <= level < currentParaLevel or MAX_EXPLICIT_LEVEL < level
2704                     throw new IllegalArgumentException("level " + level +
2705                                                        " out of bounds at " + i);
2706                 }
2707             }
2708             if (overrideFlag != 0) {
2709                 /* keep the override flag in levels[i] but adjust the flags */
2710                 flags |= DirPropFlagO(level);
2711             } else {
2712                 /* set the flags */
2713                 flags |= DirPropFlagE(level) | DirPropFlag(dirProp);
2714             }
2715         }
2716         if ((flags & MASK_EMBEDDING) != 0)
2717             flags |= DirPropFlagLR(paraLevel);
2718         /* determine if the text is mixed-directional or single-directional */
2719         return directionFromFlags();
2720     }
2721 
2722     /*********************************************************************/
2723     /* The Properties state machine table                                */
2724     /*********************************************************************/
2725     /*                                                                   */
2726     /* All table cells are 8 bits:                                       */
2727     /*      bits 0..4:  next state                                       */
2728     /*      bits 5..7:  action to perform (if > 0)                       */
2729     /*                                                                   */
2730     /* Cells may be of format "n" where n represents the next state      */
2731     /* (except for the rightmost column).                                */
2732     /* Cells may also be of format "_(x,y)" where x represents an action */
2733     /* to perform and y represents the next state.                       */
2734     /*                                                                   */
2735     /*********************************************************************/
2736     /* Definitions and type for properties state tables                  */
2737     /*********************************************************************/
2738     private static final int IMPTABPROPS_COLUMNS = 16;
2739     private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
GetStateProps(short cell)2740     private static short GetStateProps(short cell) {
2741         return (short)(cell & 0x1f);
2742     }
GetActionProps(short cell)2743     private static short GetActionProps(short cell) {
2744         return (short)(cell >> 5);
2745     }
2746 
2747     private static final short groupProp[] =          /* dirProp regrouped */
2748     {
2749         /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
2750             0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
2751     };
2752     private static final short _L  = 0;
2753     private static final short _R  = 1;
2754     private static final short _EN = 2;
2755     private static final short _AN = 3;
2756     private static final short _ON = 4;
2757     private static final short _S  = 5;
2758     private static final short _B  = 6; /* reduced dirProp */
2759 
2760     /*********************************************************************/
2761     /*                                                                   */
2762     /*      PROPERTIES  STATE  TABLE                                     */
2763     /*                                                                   */
2764     /* In table impTabProps,                                             */
2765     /*      - the ON column regroups ON and WS, FSI, RLI, LRI and PDI    */
2766     /*      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF         */
2767     /*      - the Res column is the reduced property assigned to a run   */
2768     /*                                                                   */
2769     /* Action 1: process current run1, init new run1                     */
2770     /*        2: init new run2                                           */
2771     /*        3: process run1, process run2, init new run1               */
2772     /*        4: process run1, set run1=run2, init new run2              */
2773     /*                                                                   */
2774     /* Notes:                                                            */
2775     /*  1) This table is used in resolveImplicitLevels().                */
2776     /*  2) This table triggers actions when there is a change in the Bidi*/
2777     /*     property of incoming characters (action 1).                   */
2778     /*  3) Most such property sequences are processed immediately (in    */
2779     /*     fact, passed to processPropertySeq().                         */
2780     /*  4) However, numbers are assembled as one sequence. This means    */
2781     /*     that undefined situations (like CS following digits, until    */
2782     /*     it is known if the next char will be a digit) are held until  */
2783     /*     following chars define them.                                  */
2784     /*     Example: digits followed by CS, then comes another CS or ON;  */
2785     /*              the digits will be processed, then the CS assigned   */
2786     /*              as the start of an ON sequence (action 3).           */
2787     /*  5) There are cases where more than one sequence must be          */
2788     /*     processed, for instance digits followed by CS followed by L:  */
2789     /*     the digits must be processed as one sequence, and the CS      */
2790     /*     must be processed as an ON sequence, all this before starting */
2791     /*     assembling chars for the opening L sequence.                  */
2792     /*                                                                   */
2793     /*                                                                   */
2794     private static final short impTabProps[][] =
2795     {
2796 /*                        L,     R,    EN,    AN,    ON,     S,     B,    ES,    ET,    CS,    BN,   NSM,    AL,   ENL,   ENR,   Res */
2797 /* 0 Init        */ {     1,     2,     4,     5,     7,    15,    17,     7,     9,     7,     0,     7,     3,    18,    21,   _ON },
2798 /* 1 L           */ {     1,  32+2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     1,     1,  32+3, 32+18, 32+21,    _L },
2799 /* 2 R           */ {  32+1,     2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     2,     2,  32+3, 32+18, 32+21,    _R },
2800 /* 3 AL          */ {  32+1,  32+2,  32+6,  32+6,  32+8, 32+16, 32+17,  32+8,  32+8,  32+8,     3,     3,     3, 32+18, 32+21,    _R },
2801 /* 4 EN          */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17, 64+10,    11, 64+10,     4,     4,  32+3,    18,    21,   _EN },
2802 /* 5 AN          */ {  32+1,  32+2,  32+4,     5,  32+7, 32+15, 32+17,  32+7,  32+9, 64+12,     5,     5,  32+3, 32+18, 32+21,   _AN },
2803 /* 6 AL:EN/AN    */ {  32+1,  32+2,     6,     6,  32+8, 32+16, 32+17,  32+8,  32+8, 64+13,     6,     6,  32+3,    18,    21,   _AN },
2804 /* 7 ON          */ {  32+1,  32+2,  32+4,  32+5,     7, 32+15, 32+17,     7, 64+14,     7,     7,     7,  32+3, 32+18, 32+21,   _ON },
2805 /* 8 AL:ON       */ {  32+1,  32+2,  32+6,  32+6,     8, 32+16, 32+17,     8,     8,     8,     8,     8,  32+3, 32+18, 32+21,   _ON },
2806 /* 9 ET          */ {  32+1,  32+2,     4,  32+5,     7, 32+15, 32+17,     7,     9,     7,     9,     9,  32+3,    18,    21,   _ON },
2807 /*10 EN+ES/CS    */ {  96+1,  96+2,     4,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    10, 128+7,  96+3,    18,    21,   _EN },
2808 /*11 EN+ET       */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17,  32+7,    11,  32+7,    11,    11,  32+3,    18,    21,   _EN },
2809 /*12 AN+CS       */ {  96+1,  96+2,  96+4,     5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    12, 128+7,  96+3, 96+18, 96+21,   _AN },
2810 /*13 AL:EN/AN+CS */ {  96+1,  96+2,     6,     6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8,    13, 128+8,  96+3,    18,    21,   _AN },
2811 /*14 ON+ET       */ {  32+1,  32+2, 128+4,  32+5,     7, 32+15, 32+17,     7,    14,     7,    14,    14,  32+3,128+18,128+21,   _ON },
2812 /*15 S           */ {  32+1,  32+2,  32+4,  32+5,  32+7,    15, 32+17,  32+7,  32+9,  32+7,    15,  32+7,  32+3, 32+18, 32+21,    _S },
2813 /*16 AL:S        */ {  32+1,  32+2,  32+6,  32+6,  32+8,    16, 32+17,  32+8,  32+8,  32+8,    16,  32+8,  32+3, 32+18, 32+21,    _S },
2814 /*17 B           */ {  32+1,  32+2,  32+4,  32+5,  32+7, 32+15,    17,  32+7,  32+9,  32+7,    17,  32+7,  32+3, 32+18, 32+21,    _B },
2815 /*18 ENL         */ {  32+1,  32+2,    18,  32+5,  32+7, 32+15, 32+17, 64+19,    20, 64+19,    18,    18,  32+3,    18,    21,    _L },
2816 /*19 ENL+ES/CS   */ {  96+1,  96+2,    18,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    19, 128+7,  96+3,    18,    21,    _L },
2817 /*20 ENL+ET      */ {  32+1,  32+2,    18,  32+5,  32+7, 32+15, 32+17,  32+7,    20,  32+7,    20,    20,  32+3,    18,    21,    _L },
2818 /*21 ENR         */ {  32+1,  32+2,    21,  32+5,  32+7, 32+15, 32+17, 64+22,    23, 64+22,    21,    21,  32+3,    18,    21,   _AN },
2819 /*22 ENR+ES/CS   */ {  96+1,  96+2,    21,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    22, 128+7,  96+3,    18,    21,   _AN },
2820 /*23 ENR+ET      */ {  32+1,  32+2,    21,  32+5,  32+7, 32+15, 32+17,  32+7,    23,  32+7,    23,    23,  32+3,    18,    21,   _AN }
2821     };
2822 
2823     /*********************************************************************/
2824     /* The levels state machine tables                                   */
2825     /*********************************************************************/
2826     /*                                                                   */
2827     /* All table cells are 8 bits:                                       */
2828     /*      bits 0..3:  next state                                       */
2829     /*      bits 4..7:  action to perform (if > 0)                       */
2830     /*                                                                   */
2831     /* Cells may be of format "n" where n represents the next state      */
2832     /* (except for the rightmost column).                                */
2833     /* Cells may also be of format "_(x,y)" where x represents an action */
2834     /* to perform and y represents the next state.                       */
2835     /*                                                                   */
2836     /* This format limits each table to 16 states each and to 15 actions.*/
2837     /*                                                                   */
2838     /*********************************************************************/
2839     /* Definitions and type for levels state tables                      */
2840     /*********************************************************************/
2841     private static final int IMPTABLEVELS_COLUMNS = _B + 2;
2842     private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1;
GetState(byte cell)2843     private static short GetState(byte cell) { return (short)(cell & 0x0f); }
GetAction(byte cell)2844     private static short GetAction(byte cell) { return (short)(cell >> 4); }
2845 
2846     private static class ImpTabPair {
2847         byte[][][] imptab;
2848         short[][] impact;
2849 
ImpTabPair(byte[][] table1, byte[][] table2, short[] act1, short[] act2)2850         ImpTabPair(byte[][] table1, byte[][] table2,
2851                    short[] act1, short[] act2) {
2852             imptab = new byte[][][] {table1, table2};
2853             impact = new short[][] {act1, act2};
2854         }
2855     }
2856 
2857     /*********************************************************************/
2858     /*                                                                   */
2859     /*      LEVELS  STATE  TABLES                                        */
2860     /*                                                                   */
2861     /* In all levels state tables,                                       */
2862     /*      - state 0 is the initial state                               */
2863     /*      - the Res column is the increment to add to the text level   */
2864     /*        for this property sequence.                                */
2865     /*                                                                   */
2866     /* The impact arrays for each table of a pair map the local action   */
2867     /* numbers of the table to the total list of actions. For instance,  */
2868     /* action 2 in a given table corresponds to the action number which  */
2869     /* appears in entry [2] of the impact array for that table.          */
2870     /* The first entry of all impact arrays must be 0.                   */
2871     /*                                                                   */
2872     /* Action 1: init conditional sequence                               */
2873     /*        2: prepend conditional sequence to current sequence        */
2874     /*        3: set ON sequence to new level - 1                        */
2875     /*        4: init EN/AN/ON sequence                                  */
2876     /*        5: fix EN/AN/ON sequence followed by R                     */
2877     /*        6: set previous level sequence to level 2                  */
2878     /*                                                                   */
2879     /* Notes:                                                            */
2880     /*  1) These tables are used in processPropertySeq(). The input      */
2881     /*     is property sequences as determined by resolveImplicitLevels. */
2882     /*  2) Most such property sequences are processed immediately        */
2883     /*     (levels are assigned).                                        */
2884     /*  3) However, some sequences cannot be assigned a final level till */
2885     /*     one or more following sequences are received. For instance,   */
2886     /*     ON following an R sequence within an even-level paragraph.    */
2887     /*     If the following sequence is R, the ON sequence will be       */
2888     /*     assigned basic run level+1, and so will the R sequence.       */
2889     /*  4) S is generally handled like ON, since its level will be fixed */
2890     /*     to paragraph level in adjustWSLevels().                       */
2891     /*                                                                   */
2892 
2893     private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */
2894         /*  In this table, conditional sequences receive the lower possible level
2895             until proven otherwise.
2896         */
2897     {
2898         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2899         /* 0 : init       */ {     0,     1,     0,     2,     0,     0,     0,  0 },
2900         /* 1 : R          */ {     0,     1,     3,     3,  0x14,  0x14,     0,  1 },
2901         /* 2 : AN         */ {     0,     1,     0,     2,  0x15,  0x15,     0,  2 },
2902         /* 3 : R+EN/AN    */ {     0,     1,     3,     3,  0x14,  0x14,     0,  2 },
2903         /* 4 : R+ON       */ {     0,  0x21,  0x33,  0x33,     4,     4,     0,  0 },
2904         /* 5 : AN+ON      */ {     0,  0x21,     0,  0x32,     5,     5,     0,  0 }
2905     };
2906 
2907     private static final byte impTabR_DEFAULT[][] = /* Odd  paragraph level */
2908         /*  In this table, conditional sequences receive the lower possible level
2909             until proven otherwise.
2910         */
2911     {
2912         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2913         /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
2914         /* 1 : L          */ {     1,     0,     1,     3,  0x14,  0x14,     0,  1 },
2915         /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
2916         /* 3 : L+AN       */ {     1,     0,     1,     3,     5,     5,     0,  1 },
2917         /* 4 : L+ON       */ {  0x21,     0,  0x21,     3,     4,     4,     0,  0 },
2918         /* 5 : L+AN+ON    */ {     1,     0,     1,     3,     5,     5,     0,  0 }
2919     };
2920 
2921     private static final short[] impAct0 = {0,1,2,3,4};
2922 
2923     private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
2924             impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
2925 
2926     private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */
2927         /* In this table, conditional sequences receive the lower possible
2928            level until proven otherwise.
2929         */
2930         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2931         /* 0 : init       */ {     0,     2,  0x11,  0x11,     0,     0,     0,  0 },
2932         /* 1 : L+EN/AN    */ {     0,  0x42,     1,     1,     0,     0,     0,  0 },
2933         /* 2 : R          */ {     0,     2,     4,     4,  0x13,  0x13,     0,  1 },
2934         /* 3 : R+ON       */ {     0,  0x22,  0x34,  0x34,     3,     3,     0,  0 },
2935         /* 4 : R+EN/AN    */ {     0,     2,     4,     4,  0x13,  0x13,     0,  2 }
2936     };
2937     private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
2938             impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
2939 
2940     private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = {
2941         /* In this table, EN/AN+ON sequences receive levels as if associated with R
2942            until proven that there is L or sor/eor on both sides. AN is handled like EN.
2943         */
2944         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2945         /* 0 init         */ {     0,     3,  0x11,  0x11,     0,     0,     0,  0 },
2946         /* 1 EN/AN        */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  2 },
2947         /* 2 EN/AN+ON     */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  1 },
2948         /* 3 R            */ {     0,     3,     5,     5,  0x14,     0,     0,  1 },
2949         /* 4 R+ON         */ {  0x20,     3,     5,     5,     4,  0x20,  0x20,  1 },
2950         /* 5 R+EN/AN      */ {     0,     3,     5,     5,  0x14,     0,     0,  2 }
2951     };
2952     private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = {
2953         /*  In this table, EN/AN+ON sequences receive levels as if associated with R
2954             until proven that there is L on both sides. AN is handled like EN.
2955         */
2956         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2957         /* 0 init         */ {     2,     0,     1,     1,     0,     0,     0,  0 },
2958         /* 1 EN/AN        */ {     2,     0,     1,     1,     0,     0,     0,  1 },
2959         /* 2 L            */ {     2,     0,  0x14,  0x14,  0x13,     0,     0,  1 },
2960         /* 3 L+ON         */ {  0x22,     0,     4,     4,     3,     0,     0,  0 },
2961         /* 4 L+EN/AN      */ {  0x22,     0,     4,     4,     3,     0,     0,  1 }
2962     };
2963     private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new
2964             ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R,
2965                        impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0);
2966 
2967     private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = {
2968         /* This table is identical to the Default LTR table except that EN and AN
2969            are handled like L.
2970         */
2971         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2972         /* 0 : init       */ {     0,     1,     0,     0,     0,     0,     0,  0 },
2973         /* 1 : R          */ {     0,     1,     0,     0,  0x14,  0x14,     0,  1 },
2974         /* 2 : AN         */ {     0,     1,     0,     0,  0x15,  0x15,     0,  2 },
2975         /* 3 : R+EN/AN    */ {     0,     1,     0,     0,  0x14,  0x14,     0,  2 },
2976         /* 4 : R+ON       */ {  0x20,     1,  0x20,  0x20,     4,     4,  0x20,  1 },
2977         /* 5 : AN+ON      */ {  0x20,     1,  0x20,  0x20,     5,     5,  0x20,  1 }
2978     };
2979     private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = {
2980         /* This table is identical to the Default RTL table except that EN and AN
2981            are handled like L.
2982         */
2983         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2984         /* 0 : init       */ {     1,     0,     1,     1,     0,     0,     0,  0 },
2985         /* 1 : L          */ {     1,     0,     1,     1,  0x14,  0x14,     0,  1 },
2986         /* 2 : EN/AN      */ {     1,     0,     1,     1,     0,     0,     0,  1 },
2987         /* 3 : L+AN       */ {     1,     0,     1,     1,     5,     5,     0,  1 },
2988         /* 4 : L+ON       */ {  0x21,     0,  0x21,  0x21,     4,     4,     0,  0 },
2989         /* 5 : L+AN+ON    */ {     1,     0,     1,     1,     5,     5,     0,  0 }
2990     };
2991     private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair
2992             (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L,
2993              impAct0, impAct0);
2994 
2995     private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = {  /* Odd  paragraph level */
2996         /*  In this table, conditional sequences receive the lower possible level
2997             until proven otherwise.
2998         */
2999         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
3000         /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
3001         /* 1 : L          */ {     1,     0,     1,     2,  0x13,  0x13,     0,  1 },
3002         /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
3003         /* 3 : L+ON       */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  0 },
3004         /* 4 : L+ON+AN    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  3 },
3005         /* 5 : L+AN+ON    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  2 },
3006         /* 6 : L+ON+EN    */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  1 }
3007     };
3008     private static final short[] impAct1 = {0,1,13,14};
3009     private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
3010             impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
3011 
3012     private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
3013         /* The case handled in this table is (visually):  R EN L
3014          */
3015         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
3016         /* 0 : init       */ {     0,  0x63,     0,     1,     0,     0,     0,  0 },
3017         /* 1 : L+AN       */ {     0,  0x63,     0,     1,  0x12,  0x30,     0,  4 },
3018         /* 2 : L+AN+ON    */ {  0x20,  0x63,  0x20,     1,     2,  0x30,  0x20,  3 },
3019         /* 3 : R          */ {     0,  0x63,  0x55,  0x56,  0x14,  0x30,     0,  3 },
3020         /* 4 : R+ON       */ {  0x30,  0x43,  0x55,  0x56,     4,  0x30,  0x30,  3 },
3021         /* 5 : R+EN       */ {  0x30,  0x43,     5,  0x56,  0x14,  0x30,  0x30,  4 },
3022         /* 6 : R+AN       */ {  0x30,  0x43,  0x55,     6,  0x14,  0x30,  0x30,  4 }
3023     };
3024     private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
3025         /* The cases handled in this table are (visually):  R EN L
3026                                                             R L AN L
3027         */
3028         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
3029         /* 0 : init       */ {  0x13,     0,     1,     1,     0,     0,     0,  0 },
3030         /* 1 : R+EN/AN    */ {  0x23,     0,     1,     1,     2,  0x40,     0,  1 },
3031         /* 2 : R+EN/AN+ON */ {  0x23,     0,     1,     1,     2,  0x40,     0,  0 },
3032         /* 3 : L          */ {     3,     0,     3,  0x36,  0x14,  0x40,     0,  1 },
3033         /* 4 : L+ON       */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  0 },
3034         /* 5 : L+ON+EN    */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  1 },
3035         /* 6 : L+AN       */ {  0x53,  0x40,     6,     6,     4,  0x40,  0x40,  3 }
3036     };
3037     private static final short[] impAct2 = {0,1,2,5,6,7,8};
3038     private static final short[] impAct3 = {0,1,9,10,11,12};
3039     private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
3040             new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
3041                            impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
3042 
3043     private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
3044             impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
3045 
3046     private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = {
3047         /*  The case handled in this table is (visually):  R EN L
3048         */
3049         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
3050         /* 0 : init       */ {     0,  0x62,     1,     1,     0,     0,     0,  0 },
3051         /* 1 : L+EN/AN    */ {     0,  0x62,     1,     1,     0,  0x30,     0,  4 },
3052         /* 2 : R          */ {     0,  0x62,  0x54,  0x54,  0x13,  0x30,     0,  3 },
3053         /* 3 : R+ON       */ {  0x30,  0x42,  0x54,  0x54,     3,  0x30,  0x30,  3 },
3054         /* 4 : R+EN/AN    */ {  0x30,  0x42,     4,     4,  0x13,  0x30,  0x30,  4 }
3055     };
3056     private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
3057             ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
3058                        impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
3059 
3060     private static class LevState {
3061         byte[][] impTab;                /* level table pointer          */
3062         short[] impAct;                 /* action map array             */
3063         int startON;                    /* start of ON sequence         */
3064         int startL2EN;                  /* start of level 2 sequence    */
3065         int lastStrongRTL;              /* index of last found R or AL  */
3066         int runStart;                   /* start position of the run    */
3067         short state;                    /* current state                */
3068         byte runLevel;                  /* run level before implicit solving */
3069     }
3070 
3071     /*------------------------------------------------------------------------*/
3072 
3073     static final int FIRSTALLOC = 10;
3074     /*
3075      *  param pos:     position where to insert
3076      *  param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
3077      */
addPoint(int pos, int flag)3078     private void addPoint(int pos, int flag)
3079     {
3080         Point point = new Point();
3081 
3082         int len = insertPoints.points.length;
3083         if (len == 0) {
3084             insertPoints.points = new Point[FIRSTALLOC];
3085             len = FIRSTALLOC;
3086         }
3087         if (insertPoints.size >= len) { /* no room for new point */
3088             Point[] savePoints = insertPoints.points;
3089             insertPoints.points = new Point[len * 2];
3090             System.arraycopy(savePoints, 0, insertPoints.points, 0, len);
3091         }
3092         point.pos = pos;
3093         point.flag = flag;
3094         insertPoints.points[insertPoints.size] = point;
3095         insertPoints.size++;
3096     }
3097 
setLevelsOutsideIsolates(int start, int limit, byte level)3098     private void setLevelsOutsideIsolates(int start, int limit, byte level)
3099     {
3100         byte dirProp;
3101         int  isolateCount = 0, k;
3102         for (k = start; k < limit; k++) {
3103             dirProp = dirProps[k];
3104             if (dirProp == PDI)
3105                 isolateCount--;
3106             if (isolateCount == 0)
3107                 levels[k] = level;
3108             if (dirProp == LRI || dirProp == RLI)
3109                 isolateCount++;
3110         }
3111     }
3112 
3113     /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
3114 
3115     /*
3116      * This implementation of the (Wn) rules applies all rules in one pass.
3117      * In order to do so, it needs a look-ahead of typically 1 character
3118      * (except for W5: sequences of ET) and keeps track of changes
3119      * in a rule Wp that affect a later Wq (p<q).
3120      *
3121      * The (Nn) and (In) rules are also performed in that same single loop,
3122      * but effectively one iteration behind for white space.
3123      *
3124      * Since all implicit rules are performed in one step, it is not necessary
3125      * to actually store the intermediate directional properties in dirProps[].
3126      */
3127 
processPropertySeq(LevState levState, short _prop, int start, int limit)3128     private void processPropertySeq(LevState levState, short _prop,
3129             int start, int limit) {
3130         byte cell;
3131         byte[][] impTab = levState.impTab;
3132         short[] impAct = levState.impAct;
3133         short oldStateSeq,actionSeq;
3134         byte level, addLevel;
3135         int start0, k;
3136 
3137         start0 = start;                 /* save original start position */
3138         oldStateSeq = levState.state;
3139         cell = impTab[oldStateSeq][_prop];
3140         levState.state = GetState(cell);        /* isolate the new state */
3141         actionSeq = impAct[GetAction(cell)];    /* isolate the action */
3142         addLevel = impTab[levState.state][IMPTABLEVELS_RES];
3143 
3144         if (actionSeq != 0) {
3145             switch (actionSeq) {
3146             case 1:                     /* init ON seq */
3147                 levState.startON = start0;
3148                 break;
3149 
3150             case 2:                     /* prepend ON seq to current seq */
3151                 start = levState.startON;
3152                 break;
3153 
3154             case 3:                     /* EN/AN after R+ON */
3155                 level = (byte)(levState.runLevel + 1);
3156                 setLevelsOutsideIsolates(levState.startON, start0, level);
3157                 break;
3158 
3159             case 4:                     /* EN/AN before R for NUMBERS_SPECIAL */
3160                 level = (byte)(levState.runLevel + 2);
3161                 setLevelsOutsideIsolates(levState.startON, start0, level);
3162                 break;
3163 
3164             case 5:                     /* L or S after possible relevant EN/AN */
3165                 /* check if we had EN after R/AL */
3166                 if (levState.startL2EN >= 0) {
3167                     addPoint(levState.startL2EN, LRM_BEFORE);
3168                 }
3169                 levState.startL2EN = -1;  /* not within previous if since could also be -2 */
3170                 /* check if we had any relevant EN/AN after R/AL */
3171                 if ((insertPoints.points.length == 0) ||
3172                         (insertPoints.size <= insertPoints.confirmed)) {
3173                     /* nothing, just clean up */
3174                     levState.lastStrongRTL = -1;
3175                     /* check if we have a pending conditional segment */
3176                     level = impTab[oldStateSeq][IMPTABLEVELS_RES];
3177                     if ((level & 1) != 0 && levState.startON > 0) { /* after ON */
3178                         start = levState.startON;   /* reset to basic run level */
3179                     }
3180                     if (_prop == _S) {              /* add LRM before S */
3181                         addPoint(start0, LRM_BEFORE);
3182                         insertPoints.confirmed = insertPoints.size;
3183                     }
3184                     break;
3185                 }
3186                 /* reset previous RTL cont to level for LTR text */
3187                 for (k = levState.lastStrongRTL + 1; k < start0; k++) {
3188                     /* reset odd level, leave runLevel+2 as is */
3189                     levels[k] = (byte)((levels[k] - 2) & ~1);
3190                 }
3191                 /* mark insert points as confirmed */
3192                 insertPoints.confirmed = insertPoints.size;
3193                 levState.lastStrongRTL = -1;
3194                 if (_prop == _S) {           /* add LRM before S */
3195                     addPoint(start0, LRM_BEFORE);
3196                     insertPoints.confirmed = insertPoints.size;
3197                 }
3198                 break;
3199 
3200             case 6:                     /* R/AL after possible relevant EN/AN */
3201                 /* just clean up */
3202                 if (insertPoints.points.length > 0)
3203                     /* remove all non confirmed insert points */
3204                     insertPoints.size = insertPoints.confirmed;
3205                 levState.startON = -1;
3206                 levState.startL2EN = -1;
3207                 levState.lastStrongRTL = limit - 1;
3208                 break;
3209 
3210             case 7:                     /* EN/AN after R/AL + possible cont */
3211                 /* check for real AN */
3212                 if ((_prop == _AN) && (dirProps[start0] == AN) &&
3213                 (reorderingMode != REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
3214                 {
3215                     /* real AN */
3216                     if (levState.startL2EN == -1) { /* if no relevant EN already found */
3217                         /* just note the rightmost digit as a strong RTL */
3218                         levState.lastStrongRTL = limit - 1;
3219                         break;
3220                     }
3221                     if (levState.startL2EN >= 0)  { /* after EN, no AN */
3222                         addPoint(levState.startL2EN, LRM_BEFORE);
3223                         levState.startL2EN = -2;
3224                     }
3225                     /* note AN */
3226                     addPoint(start0, LRM_BEFORE);
3227                     break;
3228                 }
3229                 /* if first EN/AN after R/AL */
3230                 if (levState.startL2EN == -1) {
3231                     levState.startL2EN = start0;
3232                 }
3233                 break;
3234 
3235             case 8:                     /* note location of latest R/AL */
3236                 levState.lastStrongRTL = limit - 1;
3237                 levState.startON = -1;
3238                 break;
3239 
3240             case 9:                     /* L after R+ON/EN/AN */
3241                 /* include possible adjacent number on the left */
3242                 for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) {
3243                 }
3244                 if (k >= 0) {
3245                     addPoint(k, RLM_BEFORE);    /* add RLM before */
3246                     insertPoints.confirmed = insertPoints.size; /* confirm it */
3247                 }
3248                 levState.startON = start0;
3249                 break;
3250 
3251             case 10:                    /* AN after L */
3252                 /* AN numbers between L text on both sides may be trouble. */
3253                 /* tentatively bracket with LRMs; will be confirmed if followed by L */
3254                 addPoint(start0, LRM_BEFORE);   /* add LRM before */
3255                 addPoint(start0, LRM_AFTER);    /* add LRM after  */
3256                 break;
3257 
3258             case 11:                    /* R after L+ON/EN/AN */
3259                 /* false alert, infirm LRMs around previous AN */
3260                 insertPoints.size=insertPoints.confirmed;
3261                 if (_prop == _S) {          /* add RLM before S */
3262                     addPoint(start0, RLM_BEFORE);
3263                     insertPoints.confirmed = insertPoints.size;
3264                 }
3265                 break;
3266 
3267             case 12:                    /* L after L+ON/AN */
3268                 level = (byte)(levState.runLevel + addLevel);
3269                 for (k=levState.startON; k < start0; k++) {
3270                     if (levels[k] < level) {
3271                         levels[k] = level;
3272                     }
3273                 }
3274                 insertPoints.confirmed = insertPoints.size;   /* confirm inserts */
3275                 levState.startON = start0;
3276                 break;
3277 
3278             case 13:                    /* L after L+ON+EN/AN/ON */
3279                 level = levState.runLevel;
3280                 for (k = start0-1; k >= levState.startON; k--) {
3281                     if (levels[k] == level+3) {
3282                         while (levels[k] == level+3) {
3283                             levels[k--] -= 2;
3284                         }
3285                         while (levels[k] == level) {
3286                             k--;
3287                         }
3288                     }
3289                     if (levels[k] == level+2) {
3290                         levels[k] = level;
3291                         continue;
3292                     }
3293                     levels[k] = (byte)(level+1);
3294                 }
3295                 break;
3296 
3297             case 14:                    /* R after L+ON+EN/AN/ON */
3298                 level = (byte)(levState.runLevel+1);
3299                 for (k = start0-1; k >= levState.startON; k--) {
3300                     if (levels[k] > level) {
3301                         levels[k] -= 2;
3302                     }
3303                 }
3304                 break;
3305 
3306             default:                        /* we should never get here */
3307                 throw new IllegalStateException("Internal ICU error in processPropertySeq");
3308             }
3309         }
3310         if ((addLevel) != 0 || (start < start0)) {
3311             level = (byte)(levState.runLevel + addLevel);
3312             if (start >= levState.runStart) {
3313                 for (k = start; k < limit; k++) {
3314                     levels[k] = level;
3315                 }
3316             } else {
3317                 setLevelsOutsideIsolates(start, limit, level);
3318             }
3319         }
3320     }
3321 
3322     /**
3323      * Returns the directionality of the last strong character at the end of the prologue, if any.
3324      * Requires prologue!=null.
3325      */
lastL_R_AL()3326     private byte lastL_R_AL() {
3327         for (int i = prologue.length(); i > 0; ) {
3328             int uchar = prologue.codePointBefore(i);
3329             i -= Character.charCount(uchar);
3330             byte dirProp = (byte)getCustomizedClass(uchar);
3331             if (dirProp == L) {
3332                 return _L;
3333             }
3334             if (dirProp == R || dirProp == AL) {
3335                 return _R;
3336             }
3337             if(dirProp == B) {
3338                 return _ON;
3339             }
3340         }
3341         return _ON;
3342     }
3343 
3344     /**
3345      * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
3346      * Requires epilogue!=null.
3347      */
firstL_R_AL_EN_AN()3348     private byte firstL_R_AL_EN_AN() {
3349         for (int i = 0; i < epilogue.length(); ) {
3350             int uchar = epilogue.codePointAt(i);
3351             i += Character.charCount(uchar);
3352             byte dirProp = (byte)getCustomizedClass(uchar);
3353             if (dirProp == L) {
3354                 return _L;
3355             }
3356             if (dirProp == R || dirProp == AL) {
3357                 return _R;
3358             }
3359             if (dirProp == EN) {
3360                 return _EN;
3361             }
3362             if (dirProp == AN) {
3363                 return _AN;
3364             }
3365         }
3366         return _ON;
3367     }
3368 
resolveImplicitLevels(int start, int limit, short sor, short eor)3369     private void resolveImplicitLevels(int start, int limit, short sor, short eor)
3370     {
3371         byte dirProp;
3372         LevState levState = new LevState();
3373         int i, start1, start2;
3374         short oldStateImp, stateImp, actionImp;
3375         short gprop, resProp, cell;
3376         boolean inverseRTL;
3377         short nextStrongProp = R;
3378         int nextStrongPos = -1;
3379 
3380         /* check for RTL inverse Bidi mode */
3381         /* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to
3382          * loop on the text characters from end to start.
3383          * This would need a different properties state table (at least different
3384          * actions) and different levels state tables (maybe very similar to the
3385          * LTR corresponding ones.
3386          */
3387         inverseRTL=((start<lastArabicPos) && ((GetParaLevelAt(start) & 1)>0) &&
3388                     (reorderingMode == REORDER_INVERSE_LIKE_DIRECT  ||
3389                      reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
3390         /* initialize for property and levels state table */
3391         levState.startL2EN = -1;        /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
3392         levState.lastStrongRTL = -1;    /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
3393         levState.runStart = start;
3394         levState.runLevel = levels[start];
3395         levState.impTab = impTabPair.imptab[levState.runLevel & 1];
3396         levState.impAct = impTabPair.impact[levState.runLevel & 1];
3397         if (start == 0 && prologue != null) {
3398             byte lastStrong = lastL_R_AL();
3399             if (lastStrong != _ON) {
3400                 sor = lastStrong;
3401             }
3402         }
3403         /* The isolates[] entries contain enough information to
3404            resume the bidi algorithm in the same state as it was
3405            when it was interrupted by an isolate sequence. */
3406         if (dirProps[start] == PDI) {
3407             levState.startON = isolates[isolateCount].startON;
3408             start1 = isolates[isolateCount].start1;
3409             stateImp = isolates[isolateCount].stateImp;
3410             levState.state = isolates[isolateCount].state;
3411             isolateCount--;
3412         } else {
3413             levState.startON = -1;
3414             start1 = start;
3415             if (dirProps[start] == NSM)
3416                 stateImp = (short)(1 + sor);
3417             else
3418                 stateImp = 0;
3419             levState.state = 0;
3420             processPropertySeq(levState, sor, start, start);
3421         }
3422         start2 = start;                 /* to make the Java compiler happy */
3423 
3424         for (i = start; i <= limit; i++) {
3425             if (i >= limit) {
3426                 int k;
3427                 for (k = limit - 1;
3428                      k > start &&
3429                          (DirPropFlag(dirProps[k]) & MASK_BN_EXPLICIT) != 0;
3430                      k--);
3431                 dirProp = dirProps[k];
3432                 if (dirProp == LRI || dirProp == RLI)
3433                     break;  /* no forced closing for sequence ending with LRI/RLI */
3434                 gprop = eor;
3435             } else {
3436                 byte prop, prop1;
3437                 prop = dirProps[i];
3438                 if (prop == B)
3439                     isolateCount = -1;  /* current isolates stack entry == none */
3440                 if (inverseRTL) {
3441                     if (prop == AL) {
3442                         /* AL before EN does not make it AN */
3443                         prop = R;
3444                     } else if (prop == EN) {
3445                         if (nextStrongPos <= i) {
3446                             /* look for next strong char (L/R/AL) */
3447                             int j;
3448                             nextStrongProp = R;     /* set default */
3449                             nextStrongPos = limit;
3450                             for (j = i+1; j < limit; j++) {
3451                                 prop1 = dirProps[j];
3452                                 if (prop1 == L || prop1 == R || prop1 == AL) {
3453                                     nextStrongProp = prop1;
3454                                     nextStrongPos = j;
3455                                     break;
3456                                 }
3457                             }
3458                         }
3459                         if (nextStrongProp == AL) {
3460                             prop = AN;
3461                         }
3462                     }
3463                 }
3464                 gprop = groupProp[prop];
3465             }
3466             oldStateImp = stateImp;
3467             cell = impTabProps[oldStateImp][gprop];
3468             stateImp = GetStateProps(cell);     /* isolate the new state */
3469             actionImp = GetActionProps(cell);   /* isolate the action */
3470             if ((i == limit) && (actionImp == 0)) {
3471                 /* there is an unprocessed sequence if its property == eor   */
3472                 actionImp = 1;                  /* process the last sequence */
3473             }
3474             if (actionImp != 0) {
3475                 resProp = impTabProps[oldStateImp][IMPTABPROPS_RES];
3476                 switch (actionImp) {
3477                 case 1:             /* process current seq1, init new seq1 */
3478                     processPropertySeq(levState, resProp, start1, i);
3479                     start1 = i;
3480                     break;
3481                 case 2:             /* init new seq2 */
3482                     start2 = i;
3483                     break;
3484                 case 3:             /* process seq1, process seq2, init new seq1 */
3485                     processPropertySeq(levState, resProp, start1, start2);
3486                     processPropertySeq(levState, _ON, start2, i);
3487                     start1 = i;
3488                     break;
3489                 case 4:             /* process seq1, set seq1=seq2, init new seq2 */
3490                     processPropertySeq(levState, resProp, start1, start2);
3491                     start1 = start2;
3492                     start2 = i;
3493                     break;
3494                 default:            /* we should never get here */
3495                     throw new IllegalStateException("Internal ICU error in resolveImplicitLevels");
3496                 }
3497             }
3498         }
3499 
3500         /* flush possible pending sequence, e.g. ON */
3501         if (limit == length && epilogue != null) {
3502             byte firstStrong = firstL_R_AL_EN_AN();
3503             if (firstStrong != _ON) {
3504                 eor = firstStrong;
3505             }
3506         }
3507 
3508         /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
3509         for (i = limit - 1;
3510              i > start &&
3511                  (DirPropFlag(dirProps[i]) & MASK_BN_EXPLICIT) != 0;
3512              i--);
3513         dirProp = dirProps[i];
3514         if ((dirProp == LRI || dirProp == RLI) && limit < length) {
3515             isolateCount++;
3516             if (isolates[isolateCount] == null)
3517                 isolates[isolateCount] = new Isolate();
3518             isolates[isolateCount].stateImp = stateImp;
3519             isolates[isolateCount].state = levState.state;
3520             isolates[isolateCount].start1 = start1;
3521             isolates[isolateCount].startON = levState.startON;
3522         }
3523         else
3524             processPropertySeq(levState, eor, limit, limit);
3525     }
3526 
3527     /* perform (L1) and (X9) ---------------------------------------------------- */
3528 
3529     /*
3530      * Reset the embedding levels for some non-graphic characters (L1).
3531      * This method also sets appropriate levels for BN, and
3532      * explicit embedding types that are supposed to have been removed
3533      * from the paragraph in (X9).
3534      */
adjustWSLevels()3535     private void adjustWSLevels() {
3536         int i;
3537 
3538         if ((flags & MASK_WS) != 0) {
3539             int flag;
3540             i = trailingWSStart;
3541             while (i > 0) {
3542                 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
3543                 while (i > 0 && ((flag = DirPropFlag(dirProps[--i])) & MASK_WS) != 0) {
3544                     if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
3545                         levels[i] = 0;
3546                     } else {
3547                         levels[i] = GetParaLevelAt(i);
3548                     }
3549                 }
3550 
3551                 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
3552                 /* here, i+1 is guaranteed to be <length */
3553                 while (i > 0) {
3554                     flag = DirPropFlag(dirProps[--i]);
3555                     if ((flag & MASK_BN_EXPLICIT) != 0) {
3556                         levels[i] = levels[i + 1];
3557                     } else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
3558                         levels[i] = 0;
3559                         break;
3560                     } else if ((flag & MASK_B_S) != 0){
3561                         levels[i] = GetParaLevelAt(i);
3562                         break;
3563                     }
3564                 }
3565             }
3566         }
3567     }
3568 
3569     /**
3570      * Set the context before a call to setPara().<p>
3571      *
3572      * setPara() computes the left-right directionality for a given piece
3573      * of text which is supplied as one of its arguments. Sometimes this piece
3574      * of text (the "main text") should be considered in context, because text
3575      * appearing before ("prologue") and/or after ("epilogue") the main text
3576      * may affect the result of this computation.<p>
3577      *
3578      * This function specifies the prologue and/or the epilogue for the next
3579      * call to setPara(). If successive calls to setPara()
3580      * all need specification of a context, setContext() must be called
3581      * before each call to setPara(). In other words, a context is not
3582      * "remembered" after the following successful call to setPara().<p>
3583      *
3584      * If a call to setPara() specifies DEFAULT_LTR or
3585      * DEFAULT_RTL as paraLevel and is preceded by a call to
3586      * setContext() which specifies a prologue, the paragraph level will
3587      * be computed taking in consideration the text in the prologue.<p>
3588      *
3589      * When setPara() is called without a previous call to
3590      * setContext, the main text is handled as if preceded and followed
3591      * by strong directional characters at the current paragraph level.
3592      * Calling setContext() with specification of a prologue will change
3593      * this behavior by handling the main text as if preceded by the last
3594      * strong character appearing in the prologue, if any.
3595      * Calling setContext() with specification of an epilogue will change
3596      * the behavior of setPara() by handling the main text as if followed
3597      * by the first strong character or digit appearing in the epilogue, if any.<p>
3598      *
3599      * Note 1: if <code>setContext</code> is called repeatedly without
3600      *         calling <code>setPara</code>, the earlier calls have no effect,
3601      *         only the last call will be remembered for the next call to
3602      *         <code>setPara</code>.<p>
3603      *
3604      * Note 2: calling <code>setContext(null, null)</code>
3605      *         cancels any previous setting of non-empty prologue or epilogue.
3606      *         The next call to <code>setPara()</code> will process no
3607      *         prologue or epilogue.<p>
3608      *
3609      * Note 3: users must be aware that even after setting the context
3610      *         before a call to setPara() to perform e.g. a logical to visual
3611      *         transformation, the resulting string may not be identical to what it
3612      *         would have been if all the text, including prologue and epilogue, had
3613      *         been processed together.<br>
3614      * Example (upper case letters represent RTL characters):<br>
3615      * &nbsp;&nbsp;prologue = "<code>abc DE</code>"<br>
3616      * &nbsp;&nbsp;epilogue = none<br>
3617      * &nbsp;&nbsp;main text = "<code>FGH xyz</code>"<br>
3618      * &nbsp;&nbsp;paraLevel = LTR<br>
3619      * &nbsp;&nbsp;display without prologue = "<code>HGF xyz</code>"
3620      *             ("HGF" is adjacent to "xyz")<br>
3621      * &nbsp;&nbsp;display with prologue = "<code>abc HGFED xyz</code>"
3622      *             ("HGF" is not adjacent to "xyz")<br>
3623      *
3624      * @param prologue is the text which precedes the text that
3625      *        will be specified in a coming call to setPara().
3626      *        If there is no prologue to consider,
3627      *        this parameter can be <code>null</code>.
3628      *
3629      * @param epilogue is the text which follows the text that
3630      *        will be specified in a coming call to setPara().
3631      *        If there is no epilogue to consider,
3632      *        this parameter can be <code>null</code>.
3633      *
3634      * @see #setPara
3635      */
setContext(String prologue, String epilogue)3636     public void setContext(String prologue, String epilogue) {
3637         this.prologue = prologue != null && prologue.length() > 0 ? prologue : null;
3638         this.epilogue = epilogue != null && epilogue.length() > 0 ? epilogue : null;
3639     }
3640 
setParaSuccess()3641     private void setParaSuccess() {
3642         prologue = null;                /* forget the last context */
3643         epilogue = null;
3644         paraBidi = this;                /* mark successful setPara */
3645     }
3646 
Bidi_Min(int x, int y)3647     int Bidi_Min(int x, int y) {
3648         return x < y ? x : y;
3649     }
3650 
Bidi_Abs(int x)3651     int Bidi_Abs(int x) {
3652         return x >= 0 ? x : -x;
3653     }
3654 
setParaRunsOnly(char[] parmText, byte parmParaLevel)3655     void setParaRunsOnly(char[] parmText, byte parmParaLevel) {
3656         int[] visualMap;
3657         String visualText;
3658         int saveLength, saveTrailingWSStart;
3659         byte[] saveLevels;
3660         byte saveDirection;
3661         int i, j, visualStart, logicalStart,
3662             oldRunCount, runLength, addedRuns, insertRemove,
3663             start, limit, step, indexOddBit, logicalPos,
3664             index, index1;
3665         int saveOptions;
3666 
3667         reorderingMode = REORDER_DEFAULT;
3668         int parmLength = parmText.length;
3669         if (parmLength == 0) {
3670             setPara(parmText, parmParaLevel, null);
3671             reorderingMode = REORDER_RUNS_ONLY;
3672             return;
3673         }
3674         /* obtain memory for mapping table and visual text */
3675         saveOptions = reorderingOptions;
3676         if ((saveOptions & OPTION_INSERT_MARKS) > 0) {
3677             reorderingOptions &= ~OPTION_INSERT_MARKS;
3678             reorderingOptions |= OPTION_REMOVE_CONTROLS;
3679         }
3680         parmParaLevel &= 1;             /* accept only 0 or 1 */
3681         setPara(parmText, parmParaLevel, null);
3682         /* we cannot access directly levels since it is not yet set if
3683          * direction is not MIXED
3684          */
3685         saveLevels = new byte[this.length];
3686         System.arraycopy(getLevels(), 0, saveLevels, 0, this.length);
3687         saveTrailingWSStart = trailingWSStart;
3688 
3689         /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
3690          * the visual map and the dirProps array to drive the second call
3691          * to setPara (but must make provision for possible removal of
3692          * Bidi controls.  Alternatively, only use the dirProps array via
3693          * customized classifier callback.
3694          */
3695         visualText = writeReordered(DO_MIRRORING);
3696         visualMap = getVisualMap();
3697         this.reorderingOptions = saveOptions;
3698         saveLength = this.length;
3699         saveDirection=this.direction;
3700 
3701         this.reorderingMode = REORDER_INVERSE_LIKE_DIRECT;
3702         parmParaLevel ^= 1;
3703         setPara(visualText, parmParaLevel, null);
3704         BidiLine.getRuns(this);
3705         /* check if some runs must be split, count how many splits */
3706         addedRuns = 0;
3707         oldRunCount = this.runCount;
3708         visualStart = 0;
3709         for (i = 0; i < oldRunCount; i++, visualStart += runLength) {
3710             runLength = runs[i].limit - visualStart;
3711             if (runLength < 2) {
3712                 continue;
3713             }
3714             logicalStart = runs[i].start;
3715             for (j = logicalStart+1; j < logicalStart+runLength; j++) {
3716                 index = visualMap[j];
3717                 index1 = visualMap[j-1];
3718                 if ((Bidi_Abs(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
3719                     addedRuns++;
3720                 }
3721             }
3722         }
3723         if (addedRuns > 0) {
3724             getRunsMemory(oldRunCount + addedRuns);
3725             if (runCount == 1) {
3726                 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
3727                 runsMemory[0] = runs[0];
3728             } else {
3729                 System.arraycopy(runs, 0, runsMemory, 0, runCount);
3730             }
3731             runs = runsMemory;
3732             runCount += addedRuns;
3733             for (i = oldRunCount; i < runCount; i++) {
3734                 if (runs[i] == null) {
3735                     runs[i] = new BidiRun(0, 0, (byte)0);
3736                 }
3737             }
3738         }
3739         /* split runs which are not consecutive in source text */
3740         int newI;
3741         for (i = oldRunCount-1; i >= 0; i--) {
3742             newI = i + addedRuns;
3743             runLength = i==0 ? runs[0].limit :
3744                                runs[i].limit - runs[i-1].limit;
3745             logicalStart = runs[i].start;
3746             indexOddBit = runs[i].level & 1;
3747             if (runLength < 2) {
3748                 if (addedRuns > 0) {
3749                     runs[newI].copyFrom(runs[i]);
3750                 }
3751                 logicalPos = visualMap[logicalStart];
3752                 runs[newI].start = logicalPos;
3753                 runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
3754                 continue;
3755             }
3756             if (indexOddBit > 0) {
3757                 start = logicalStart;
3758                 limit = logicalStart + runLength - 1;
3759                 step = 1;
3760             } else {
3761                 start = logicalStart + runLength - 1;
3762                 limit = logicalStart;
3763                 step = -1;
3764             }
3765             for (j = start; j != limit; j += step) {
3766                 index = visualMap[j];
3767                 index1 = visualMap[j+step];
3768                 if ((Bidi_Abs(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
3769                     logicalPos = Bidi_Min(visualMap[start], index);
3770                     runs[newI].start = logicalPos;
3771                     runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
3772                     runs[newI].limit = runs[i].limit;
3773                     runs[i].limit -= Bidi_Abs(j - start) + 1;
3774                     insertRemove = runs[i].insertRemove & (LRM_AFTER|RLM_AFTER);
3775                     runs[newI].insertRemove = insertRemove;
3776                     runs[i].insertRemove &= ~insertRemove;
3777                     start = j + step;
3778                     addedRuns--;
3779                     newI--;
3780                 }
3781             }
3782             if (addedRuns > 0) {
3783                 runs[newI].copyFrom(runs[i]);
3784             }
3785             logicalPos = Bidi_Min(visualMap[start], visualMap[limit]);
3786             runs[newI].start = logicalPos;
3787             runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
3788         }
3789 
3790 //    cleanup1:
3791         /* restore initial paraLevel */
3792         this.paraLevel ^= 1;
3793 //    cleanup2:
3794         /* restore real text */
3795         this.text = parmText;
3796         this.length = saveLength;
3797         this.originalLength = parmLength;
3798         this.direction=saveDirection;
3799         this.levels = saveLevels;
3800         this.trailingWSStart = saveTrailingWSStart;
3801         if (runCount > 1) {
3802             this.direction = MIXED;
3803         }
3804 //    cleanup3:
3805         this.reorderingMode = REORDER_RUNS_ONLY;
3806     }
3807 
3808     /**
3809      * Perform the Unicode Bidi algorithm. It is defined in the
3810      * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9</a>.
3811      *
3812      * <p>This method takes a piece of plain text containing one or more paragraphs,
3813      * with or without externally specified embedding levels from <i>styled</i>
3814      * text and computes the left-right-directionality of each character.</p>
3815      *
3816      * <p>If the entire text is all of the same directionality, then
3817      * the method may not perform all the steps described by the algorithm,
3818      * i.e., some levels may not be the same as if all steps were performed.
3819      * This is not relevant for unidirectional text.<br>
3820      * For example, in pure LTR text with numbers the numbers would get
3821      * a resolved level of 2 higher than the surrounding text according to
3822      * the algorithm. This implementation may set all resolved levels to
3823      * the same value in such a case.</p>
3824      *
3825      * <p>The text can be composed of multiple paragraphs. Occurrence of a block
3826      * separator in the text terminates a paragraph, and whatever comes next starts
3827      * a new paragraph. The exception to this rule is when a Carriage Return (CR)
3828      * is followed by a Line Feed (LF). Both CR and LF are block separators, but
3829      * in that case, the pair of characters is considered as terminating the
3830      * preceding paragraph, and a new paragraph will be started by a character
3831      * coming after the LF.
3832      *
3833      * <p>Although the text is passed here as a <code>String</code>, it is
3834      * stored internally as an array of characters. Therefore the
3835      * documentation will refer to indexes of the characters in the text.
3836      *
3837      * @param text contains the text that the Bidi algorithm will be performed
3838      *        on. This text can be retrieved with <code>getText()</code> or
3839      *        <code>getTextAsString</code>.<br>
3840      *
3841      * @param paraLevel specifies the default level for the text;
3842      *        it is typically 0 (LTR) or 1 (RTL).
3843      *        If the method shall determine the paragraph level from the text,
3844      *        then <code>paraLevel</code> can be set to
3845      *        either <code>LEVEL_DEFAULT_LTR</code>
3846      *        or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
3847      *        paragraphs, the paragraph level shall be determined separately for
3848      *        each paragraph; if a paragraph does not include any strongly typed
3849      *        character, then the desired default is used (0 for LTR or 1 for RTL).
3850      *        Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
3851      *        is also valid, with odd levels indicating RTL.
3852      *
3853      * @param embeddingLevels (in) may be used to preset the embedding and override levels,
3854      *        ignoring characters like LRE and PDF in the text.
3855      *        A level overrides the directional property of its corresponding
3856      *        (same index) character if the level has the
3857      *        <code>LEVEL_OVERRIDE</code> bit set.<br><br>
3858      *        Aside from that bit, it must be
3859      *        <code>paraLevel&lt;=embeddingLevels[]&lt;=MAX_EXPLICIT_LEVEL</code>,
3860      *        except that level 0 is always allowed.
3861      *        Level 0 for a paragraph separator prevents reordering of paragraphs;
3862      *        this only works reliably if <code>LEVEL_OVERRIDE</code>
3863      *        is also set for paragraph separators.
3864      *        Level 0 for other characters is treated as a wildcard
3865      *        and is lifted up to the resolved level of the surrounding paragraph.<br><br>
3866      *        <strong>Caution: </strong>A reference to this array, not a copy
3867      *        of the levels, will be stored in the <code>Bidi</code> object;
3868      *        the <code>embeddingLevels</code>
3869      *        should not be modified to avoid unexpected results on subsequent
3870      *        Bidi operations. However, the <code>setPara()</code> and
3871      *        <code>setLine()</code> methods may modify some or all of the
3872      *        levels.<br><br>
3873      *        <strong>Note:</strong> the <code>embeddingLevels</code> array must
3874      *        have one entry for each character in <code>text</code>.
3875      *
3876      * @throws IllegalArgumentException if the values in embeddingLevels are
3877      *         not within the allowed range
3878      *
3879      * @see #LEVEL_DEFAULT_LTR
3880      * @see #LEVEL_DEFAULT_RTL
3881      * @see #LEVEL_OVERRIDE
3882      * @see #MAX_EXPLICIT_LEVEL
3883      */
setPara(String text, byte paraLevel, byte[] embeddingLevels)3884     public void setPara(String text, byte paraLevel, byte[] embeddingLevels)
3885     {
3886         if (text == null) {
3887             setPara(new char[0], paraLevel, embeddingLevels);
3888         } else {
3889             setPara(text.toCharArray(), paraLevel, embeddingLevels);
3890         }
3891     }
3892 
3893     /**
3894      * Perform the Unicode Bidi algorithm. It is defined in the
3895      * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9</a>.
3896      *
3897      * <p>This method takes a piece of plain text containing one or more paragraphs,
3898      * with or without externally specified embedding levels from <i>styled</i>
3899      * text and computes the left-right-directionality of each character.</p>
3900      *
3901      * <p>If the entire text is all of the same directionality, then
3902      * the method may not perform all the steps described by the algorithm,
3903      * i.e., some levels may not be the same as if all steps were performed.
3904      * This is not relevant for unidirectional text.<br>
3905      * For example, in pure LTR text with numbers the numbers would get
3906      * a resolved level of 2 higher than the surrounding text according to
3907      * the algorithm. This implementation may set all resolved levels to
3908      * the same value in such a case.</p>
3909      *
3910      * <p>The text can be composed of multiple paragraphs. Occurrence of a block
3911      * separator in the text terminates a paragraph, and whatever comes next starts
3912      * a new paragraph. The exception to this rule is when a Carriage Return (CR)
3913      * is followed by a Line Feed (LF). Both CR and LF are block separators, but
3914      * in that case, the pair of characters is considered as terminating the
3915      * preceding paragraph, and a new paragraph will be started by a character
3916      * coming after the LF.
3917      *
3918      * <p>The text is stored internally as an array of characters. Therefore the
3919      * documentation will refer to indexes of the characters in the text.
3920      *
3921      * @param chars contains the text that the Bidi algorithm will be performed
3922      *        on. This text can be retrieved with <code>getText()</code> or
3923      *        <code>getTextAsString</code>.<br>
3924      *
3925      * @param paraLevel specifies the default level for the text;
3926      *        it is typically 0 (LTR) or 1 (RTL).
3927      *        If the method shall determine the paragraph level from the text,
3928      *        then <code>paraLevel</code> can be set to
3929      *        either <code>LEVEL_DEFAULT_LTR</code>
3930      *        or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
3931      *        paragraphs, the paragraph level shall be determined separately for
3932      *        each paragraph; if a paragraph does not include any strongly typed
3933      *        character, then the desired default is used (0 for LTR or 1 for RTL).
3934      *        Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
3935      *        is also valid, with odd levels indicating RTL.
3936      *
3937      * @param embeddingLevels (in) may be used to preset the embedding and
3938      *        override levels, ignoring characters like LRE and PDF in the text.
3939      *        A level overrides the directional property of its corresponding
3940      *        (same index) character if the level has the
3941      *        <code>LEVEL_OVERRIDE</code> bit set.<br><br>
3942      *        Aside from that bit, it must be
3943      *        <code>paraLevel&lt;=embeddingLevels[]&lt;=MAX_EXPLICIT_LEVEL</code>,
3944      *        except that level 0 is always allowed.
3945      *        Level 0 for a paragraph separator prevents reordering of paragraphs;
3946      *        this only works reliably if <code>LEVEL_OVERRIDE</code>
3947      *        is also set for paragraph separators.
3948      *        Level 0 for other characters is treated as a wildcard
3949      *        and is lifted up to the resolved level of the surrounding paragraph.<br><br>
3950      *        <strong>Caution: </strong>A reference to this array, not a copy
3951      *        of the levels, will be stored in the <code>Bidi</code> object;
3952      *        the <code>embeddingLevels</code>
3953      *        should not be modified to avoid unexpected results on subsequent
3954      *        Bidi operations. However, the <code>setPara()</code> and
3955      *        <code>setLine()</code> methods may modify some or all of the
3956      *        levels.<br><br>
3957      *        <strong>Note:</strong> the <code>embeddingLevels</code> array must
3958      *        have one entry for each character in <code>text</code>.
3959      *
3960      * @throws IllegalArgumentException if the values in embeddingLevels are
3961      *         not within the allowed range
3962      *
3963      * @see #LEVEL_DEFAULT_LTR
3964      * @see #LEVEL_DEFAULT_RTL
3965      * @see #LEVEL_OVERRIDE
3966      * @see #MAX_EXPLICIT_LEVEL
3967      */
setPara(char[] chars, byte paraLevel, byte[] embeddingLevels)3968     public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels)
3969     {
3970         /* check the argument values */
3971         if (paraLevel < LEVEL_DEFAULT_LTR) {
3972             verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1);
3973         }
3974         if (chars == null) {
3975             chars = new char[0];
3976         }
3977 
3978         /* special treatment for RUNS_ONLY mode */
3979         if (reorderingMode == REORDER_RUNS_ONLY) {
3980             setParaRunsOnly(chars, paraLevel);
3981             return;
3982         }
3983 
3984         /* initialize the Bidi object */
3985         this.paraBidi = null;          /* mark unfinished setPara */
3986         this.text = chars;
3987         this.length = this.originalLength = this.resultLength = text.length;
3988         this.paraLevel = paraLevel;
3989         this.direction = (byte)(paraLevel & 1);
3990         this.paraCount = 1;
3991 
3992         /* Allocate zero-length arrays instead of setting to null here; then
3993          * checks for null in various places can be eliminated.
3994          */
3995         dirProps = new byte[0];
3996         levels = new byte[0];
3997         runs = new BidiRun[0];
3998         isGoodLogicalToVisualRunsMap = false;
3999         insertPoints.size = 0;          /* clean up from last call */
4000         insertPoints.confirmed = 0;     /* clean up from last call */
4001 
4002         /*
4003          * Save the original paraLevel if contextual; otherwise, set to 0.
4004          */
4005         defaultParaLevel = IsDefaultLevel(paraLevel) ? paraLevel : 0;
4006 
4007         if (length == 0) {
4008             /*
4009              * For an empty paragraph, create a Bidi object with the paraLevel and
4010              * the flags and the direction set but without allocating zero-length arrays.
4011              * There is nothing more to do.
4012              */
4013             if (IsDefaultLevel(paraLevel)) {
4014                 this.paraLevel &= 1;
4015                 defaultParaLevel = 0;
4016             }
4017             flags = DirPropFlagLR(paraLevel);
4018             runCount = 0;
4019             paraCount = 0;
4020             setParaSuccess();
4021             return;
4022         }
4023 
4024         runCount = -1;
4025 
4026         /*
4027          * Get the directional properties,
4028          * the flags bit-set, and
4029          * determine the paragraph level if necessary.
4030          */
4031         getDirPropsMemory(length);
4032         dirProps = dirPropsMemory;
4033         getDirProps();
4034         /* the processed length may have changed if OPTION_STREAMING is set */
4035         trailingWSStart = length;  /* the levels[] will reflect the WS run */
4036 
4037         /* are explicit levels specified? */
4038         if (embeddingLevels == null) {
4039             /* no: determine explicit levels according to the (Xn) rules */
4040             getLevelsMemory(length);
4041             levels = levelsMemory;
4042             direction = resolveExplicitLevels();
4043         } else {
4044             /* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */
4045             levels = embeddingLevels;
4046             direction = checkExplicitLevels();
4047         }
4048 
4049         /* allocate isolate memory */
4050         if (isolateCount > 0) {
4051             if (isolates == null || isolates.length < isolateCount)
4052                 isolates = new Isolate[isolateCount + 3];   /* keep some reserve */
4053         }
4054         isolateCount = -1;              /* current isolates stack entry == none */
4055 
4056         /*
4057          * The steps after (X9) in the Bidi algorithm are performed only if
4058          * the paragraph text has mixed directionality!
4059          */
4060         switch (direction) {
4061         case LTR:
4062             /* all levels are implicitly at paraLevel (important for getLevels()) */
4063             trailingWSStart = 0;
4064             break;
4065         case RTL:
4066             /* all levels are implicitly at paraLevel (important for getLevels()) */
4067             trailingWSStart = 0;
4068             break;
4069         default:
4070             /*
4071              *  Choose the right implicit state table
4072              */
4073             switch(reorderingMode) {
4074             case REORDER_DEFAULT:
4075                 this.impTabPair = impTab_DEFAULT;
4076                 break;
4077             case REORDER_NUMBERS_SPECIAL:
4078                 this.impTabPair = impTab_NUMBERS_SPECIAL;
4079                 break;
4080             case REORDER_GROUP_NUMBERS_WITH_R:
4081                 this.impTabPair = impTab_GROUP_NUMBERS_WITH_R;
4082                 break;
4083             case REORDER_RUNS_ONLY:
4084                 /* we should never get here */
4085                 throw new InternalError("Internal ICU error in setPara");
4086                 /* break; */
4087             case REORDER_INVERSE_NUMBERS_AS_L:
4088                 this.impTabPair = impTab_INVERSE_NUMBERS_AS_L;
4089                 break;
4090             case REORDER_INVERSE_LIKE_DIRECT:
4091                 if ((reorderingOptions & OPTION_INSERT_MARKS) != 0) {
4092                     this.impTabPair = impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
4093                 } else {
4094                     this.impTabPair = impTab_INVERSE_LIKE_DIRECT;
4095                 }
4096                 break;
4097             case REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
4098                 if ((reorderingOptions & OPTION_INSERT_MARKS) != 0) {
4099                     this.impTabPair = impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
4100                 } else {
4101                     this.impTabPair = impTab_INVERSE_FOR_NUMBERS_SPECIAL;
4102                 }
4103                 break;
4104             }
4105             /*
4106              * If there are no external levels specified and there
4107              * are no significant explicit level codes in the text,
4108              * then we can treat the entire paragraph as one run.
4109              * Otherwise, we need to perform the following rules on runs of
4110              * the text with the same embedding levels. (X10)
4111              * "Significant" explicit level codes are ones that actually
4112              * affect non-BN characters.
4113              * Examples for "insignificant" ones are empty embeddings
4114              * LRE-PDF, LRE-RLE-PDF-PDF, etc.
4115              */
4116             if (embeddingLevels == null && paraCount <= 1 &&
4117                 (flags & DirPropFlagMultiRuns) == 0) {
4118                 resolveImplicitLevels(0, length,
4119                         GetLRFromLevel(GetParaLevelAt(0)),
4120                         GetLRFromLevel(GetParaLevelAt(length - 1)));
4121             } else {
4122                 /* sor, eor: start and end types of same-level-run */
4123                 int start, limit = 0;
4124                 byte level, nextLevel;
4125                 short sor, eor;
4126 
4127                 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
4128                 level = GetParaLevelAt(0);
4129                 nextLevel = levels[0];
4130                 if (level < nextLevel) {
4131                     eor = GetLRFromLevel(nextLevel);
4132                 } else {
4133                     eor = GetLRFromLevel(level);
4134                 }
4135 
4136                 do {
4137                     /* determine start and limit of the run (end points just behind the run) */
4138 
4139                     /* the values for this run's start are the same as for the previous run's end */
4140                     start = limit;
4141                     level = nextLevel;
4142                     if ((start > 0) && (dirProps[start - 1] == B)) {
4143                         /* except if this is a new paragraph, then set sor = para level */
4144                         sor = GetLRFromLevel(GetParaLevelAt(start));
4145                     } else {
4146                         sor = eor;
4147                     }
4148 
4149                     /* search for the limit of this run */
4150                     while ((++limit < length) &&
4151                            ((levels[limit] == level) ||
4152                             ((DirPropFlag(dirProps[limit]) & MASK_BN_EXPLICIT) != 0))) {}
4153 
4154                     /* get the correct level of the next run */
4155                     if (limit < length) {
4156                         nextLevel = levels[limit];
4157                     } else {
4158                         nextLevel = GetParaLevelAt(length - 1);
4159                     }
4160 
4161                     /* determine eor from max(level, nextLevel); sor is last run's eor */
4162                     if (NoOverride(level) < NoOverride(nextLevel)) {
4163                         eor = GetLRFromLevel(nextLevel);
4164                     } else {
4165                         eor = GetLRFromLevel(level);
4166                     }
4167 
4168                     /* if the run consists of overridden directional types, then there
4169                        are no implicit types to be resolved */
4170                     if ((level & LEVEL_OVERRIDE) == 0) {
4171                         resolveImplicitLevels(start, limit, sor, eor);
4172                     } else {
4173                         /* remove the LEVEL_OVERRIDE flags */
4174                         do {
4175                             levels[start++] &= ~LEVEL_OVERRIDE;
4176                         } while (start < limit);
4177                     }
4178                 } while (limit  < length);
4179             }
4180 
4181             /* reset the embedding levels for some non-graphic characters (L1), (X9) */
4182             adjustWSLevels();
4183 
4184             break;
4185         }
4186         /* add RLM for inverse Bidi with contextual orientation resolving
4187          * to RTL which would not round-trip otherwise
4188          */
4189         if ((defaultParaLevel > 0) &&
4190             ((reorderingOptions & OPTION_INSERT_MARKS) != 0) &&
4191             ((reorderingMode == REORDER_INVERSE_LIKE_DIRECT) ||
4192              (reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
4193             int start, last;
4194             byte level;
4195             byte dirProp;
4196             for (int i = 0; i < paraCount; i++) {
4197                 last = paras_limit[i] - 1;
4198                 level = paras_level[i];
4199                 if (level == 0)
4200                     continue;           /* LTR paragraph */
4201                 start = i == 0 ? 0 : paras_limit[i - 1];
4202                 for (int j = last; j >= start; j--) {
4203                     dirProp = dirProps[j];
4204                     if (dirProp == L) {
4205                         if (j < last) {
4206                             while (dirProps[last] == B) {
4207                                 last--;
4208                             }
4209                         }
4210                         addPoint(last, RLM_BEFORE);
4211                         break;
4212                     }
4213                     if ((DirPropFlag(dirProp) & MASK_R_AL) != 0) {
4214                         break;
4215                     }
4216                 }
4217             }
4218         }
4219 
4220         if ((reorderingOptions & OPTION_REMOVE_CONTROLS) != 0) {
4221             resultLength -= controlCount;
4222         } else {
4223             resultLength += insertPoints.size;
4224         }
4225         setParaSuccess();
4226     }
4227 
4228     /**
4229      * Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
4230      * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
4231      * version 13,
4232      * also described in The Unicode Standard, Version 4.0 .<p>
4233      *
4234      * This method takes a paragraph of text and computes the
4235      * left-right-directionality of each character. The text should not
4236      * contain any Unicode block separators.<p>
4237      *
4238      * The RUN_DIRECTION attribute in the text, if present, determines the base
4239      * direction (left-to-right or right-to-left). If not present, the base
4240      * direction is computed using the Unicode Bidirectional Algorithm,
4241      * defaulting to left-to-right if there are no strong directional characters
4242      * in the text. This attribute, if present, must be applied to all the text
4243      * in the paragraph.<p>
4244      *
4245      * The BIDI_EMBEDDING attribute in the text, if present, represents
4246      * embedding level information.
4247      * Negative values indicate overrides at the absolute value of the level.
4248      * Positive values indicate embeddings. (See {@link #MAX_EXPLICIT_LEVEL}.)
4249      * Where values are zero or not defined, the base
4250      * embedding level as determined by the base direction is assumed.<p>
4251      *
4252      * The NUMERIC_SHAPING attribute in the text, if present, converts European
4253      * digits to other decimal digits before running the bidi algorithm. This
4254      * attribute, if present, must be applied to all the text in the paragraph.
4255      *
4256      * If the entire text is all of the same directionality, then
4257      * the method may not perform all the steps described by the algorithm,
4258      * i.e., some levels may not be the same as if all steps were performed.
4259      * This is not relevant for unidirectional text.<br>
4260      * For example, in pure LTR text with numbers the numbers would get
4261      * a resolved level of 2 higher than the surrounding text according to
4262      * the algorithm. This implementation may set all resolved levels to
4263      * the same value in such a case.<p>
4264      *
4265      * @param paragraph a paragraph of text with optional character and
4266      *        paragraph attribute information
4267      */
setPara(AttributedCharacterIterator paragraph)4268     public void setPara(AttributedCharacterIterator paragraph)
4269     {
4270         byte paraLvl;
4271         Boolean runDirection = (Boolean) paragraph.getAttribute(TextAttribute.RUN_DIRECTION);
4272         if (runDirection == null) {
4273             paraLvl = LEVEL_DEFAULT_LTR;
4274         } else {
4275             paraLvl = (runDirection.equals(TextAttribute.RUN_DIRECTION_LTR)) ?
4276                         LTR : RTL;
4277         }
4278 
4279         byte[] lvls = null;
4280         int len = paragraph.getEndIndex() - paragraph.getBeginIndex();
4281         byte[] embeddingLevels = new byte[len];
4282         char[] txt = new char[len];
4283         int i = 0;
4284         char ch = paragraph.first();
4285         while (ch != AttributedCharacterIterator.DONE) {
4286             txt[i] = ch;
4287             Integer embedding = (Integer) paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING);
4288             if (embedding != null) {
4289                 byte level = embedding.byteValue();
4290                 if (level == 0) {
4291                     /* no-op */
4292                 } else if (level < 0) {
4293                     lvls = embeddingLevels;
4294                     embeddingLevels[i] = (byte)((0 - level) | LEVEL_OVERRIDE);
4295                 } else {
4296                     lvls = embeddingLevels;
4297                     embeddingLevels[i] = level;
4298                 }
4299             }
4300             ch = paragraph.next();
4301             ++i;
4302         }
4303 
4304         NumericShaper shaper = (NumericShaper) paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING);
4305         if (shaper != null) {
4306             shaper.shape(txt, 0, len);
4307         }
4308         setPara(txt, paraLvl, lvls);
4309     }
4310 
4311     /**
4312      * Specify whether block separators must be allocated level zero,
4313      * so that successive paragraphs will progress from left to right.
4314      * This method must be called before <code>setPara()</code>.
4315      * Paragraph separators (B) may appear in the text.  Setting them to level zero
4316      * means that all paragraph separators (including one possibly appearing
4317      * in the last text position) are kept in the reordered text after the text
4318      * that they follow in the source text.
4319      * When this feature is not enabled, a paragraph separator at the last
4320      * position of the text before reordering will go to the first position
4321      * of the reordered text when the paragraph level is odd.
4322      *
4323      * @param ordarParaLTR specifies whether paragraph separators (B) must
4324      * receive level 0, so that successive paragraphs progress from left to right.
4325      *
4326      * @see #setPara
4327      */
orderParagraphsLTR(boolean ordarParaLTR)4328     public void orderParagraphsLTR(boolean ordarParaLTR) {
4329         orderParagraphsLTR = ordarParaLTR;
4330     }
4331 
4332     /**
4333      * Is this <code>Bidi</code> object set to allocate level 0 to block
4334      * separators so that successive paragraphs progress from left to right?
4335      *
4336      * @return <code>true</code> if the <code>Bidi</code> object is set to
4337      *         allocate level 0 to block separators.
4338      */
isOrderParagraphsLTR()4339     public boolean isOrderParagraphsLTR() {
4340         return orderParagraphsLTR;
4341     }
4342 
4343     /**
4344      * Get the directionality of the text.
4345      *
4346      * @return a value of <code>LTR</code>, <code>RTL</code> or <code>MIXED</code>
4347      *         that indicates if the entire text
4348      *         represented by this object is unidirectional,
4349      *         and which direction, or if it is mixed-directional.
4350      *
4351      * @throws IllegalStateException if this call is not preceded by a successful
4352      *         call to <code>setPara</code> or <code>setLine</code>
4353      *
4354      * @see #LTR
4355      * @see #RTL
4356      * @see #MIXED
4357      */
getDirection()4358     public byte getDirection()
4359     {
4360         verifyValidParaOrLine();
4361         return direction;
4362     }
4363 
4364     /**
4365      * Get the text.
4366      *
4367      * @return A <code>String</code> containing the text that the
4368      *         <code>Bidi</code> object was created for.
4369      *
4370      * @throws IllegalStateException if this call is not preceded by a successful
4371      *         call to <code>setPara</code> or <code>setLine</code>
4372      *
4373      * @see #setPara
4374      * @see #setLine
4375      */
getTextAsString()4376     public String getTextAsString()
4377     {
4378         verifyValidParaOrLine();
4379         return new String(text);
4380     }
4381 
4382     /**
4383      * Get the text.
4384      *
4385      * @return A <code>char</code> array containing the text that the
4386      *         <code>Bidi</code> object was created for.
4387      *
4388      * @throws IllegalStateException if this call is not preceded by a successful
4389      *         call to <code>setPara</code> or <code>setLine</code>
4390      *
4391      * @see #setPara
4392      * @see #setLine
4393      */
getText()4394     public char[] getText()
4395     {
4396         verifyValidParaOrLine();
4397         return text;
4398     }
4399 
4400     /**
4401      * Get the length of the text.
4402      *
4403      * @return The length of the text that the <code>Bidi</code> object was
4404      *         created for.
4405      *
4406      * @throws IllegalStateException if this call is not preceded by a successful
4407      *         call to <code>setPara</code> or <code>setLine</code>
4408      */
getLength()4409     public int getLength()
4410     {
4411         verifyValidParaOrLine();
4412         return originalLength;
4413     }
4414 
4415     /**
4416      * Get the length of the source text processed by the last call to
4417      * <code>setPara()</code>. This length may be different from the length of
4418      * the source text if option <code>OPTION_STREAMING</code> has been
4419      * set.
4420      * <br>
4421      * Note that whenever the length of the text affects the execution or the
4422      * result of a method, it is the processed length which must be considered,
4423      * except for <code>setPara</code> (which receives unprocessed source text)
4424      * and <code>getLength</code> (which returns the original length of the
4425      * source text).<br>
4426      * In particular, the processed length is the one to consider in the
4427      * following cases:
4428      * <ul>
4429      * <li>maximum value of the <code>limit</code> argument of
4430      * <code>setLine</code></li>
4431      * <li>maximum value of the <code>charIndex</code> argument of
4432      * <code>getParagraph</code></li>
4433      * <li>maximum value of the <code>charIndex</code> argument of
4434      * <code>getLevelAt</code></li>
4435      * <li>number of elements in the array returned by <code>getLevels</code>
4436      * </li>
4437      * <li>maximum value of the <code>logicalStart</code> argument of
4438      * <code>getLogicalRun</code></li>
4439      * <li>maximum value of the <code>logicalIndex</code> argument of
4440      * <code>getVisualIndex</code></li>
4441      * <li>number of elements returned by <code>getLogicalMap</code></li>
4442      * <li>length of text processed by <code>writeReordered</code></li>
4443      * </ul>
4444      *
4445      * @return The length of the part of the source text processed by
4446      *         the last call to <code>setPara</code>.
4447      *
4448      * @throws IllegalStateException if this call is not preceded by a successful
4449      *         call to <code>setPara</code> or <code>setLine</code>
4450      *
4451      * @see #setPara
4452      * @see #OPTION_STREAMING
4453      */
getProcessedLength()4454     public int getProcessedLength() {
4455         verifyValidParaOrLine();
4456         return length;
4457     }
4458 
4459     /**
4460      * Get the length of the reordered text resulting from the last call to
4461      * <code>setPara()</code>. This length may be different from the length
4462      * of the source text if option <code>OPTION_INSERT_MARKS</code>
4463      * or option <code>OPTION_REMOVE_CONTROLS</code> has been set.
4464      * <br>
4465      * This resulting length is the one to consider in the following cases:
4466      * <ul>
4467      * <li>maximum value of the <code>visualIndex</code> argument of
4468      * <code>getLogicalIndex</code></li>
4469      * <li>number of elements returned by <code>getVisualMap</code></li>
4470      * </ul>
4471      * Note that this length stays identical to the source text length if
4472      * Bidi marks are inserted or removed using option bits of
4473      * <code>writeReordered</code>, or if option
4474      * <code>REORDER_INVERSE_NUMBERS_AS_L</code> has been set.
4475      *
4476      * @return The length of the reordered text resulting from
4477      *         the last call to <code>setPara</code>.
4478      *
4479      * @throws IllegalStateException if this call is not preceded by a successful
4480      *         call to <code>setPara</code> or <code>setLine</code>
4481      *
4482      * @see #setPara
4483      * @see #OPTION_INSERT_MARKS
4484      * @see #OPTION_REMOVE_CONTROLS
4485      * @see #REORDER_INVERSE_NUMBERS_AS_L
4486      */
getResultLength()4487     public int getResultLength() {
4488         verifyValidParaOrLine();
4489         return resultLength;
4490     }
4491 
4492     /* paragraphs API methods ------------------------------------------------- */
4493 
4494     /**
4495      * Get the paragraph level of the text.
4496      *
4497      * @return The paragraph level. If there are multiple paragraphs, their
4498      *         level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or
4499      *         LEVEL_DEFAULT_RTL.  In that case, the level of the first paragraph
4500      *         is returned.
4501      *
4502      * @throws IllegalStateException if this call is not preceded by a successful
4503      *         call to <code>setPara</code> or <code>setLine</code>
4504      *
4505      * @see #LEVEL_DEFAULT_LTR
4506      * @see #LEVEL_DEFAULT_RTL
4507      * @see #getParagraph
4508      * @see #getParagraphByIndex
4509      */
getParaLevel()4510     public byte getParaLevel()
4511     {
4512         verifyValidParaOrLine();
4513         return paraLevel;
4514     }
4515 
4516     /**
4517      * Get the number of paragraphs.
4518      *
4519      * @return The number of paragraphs.
4520      *
4521      * @throws IllegalStateException if this call is not preceded by a successful
4522      *         call to <code>setPara</code> or <code>setLine</code>
4523      */
countParagraphs()4524     public int countParagraphs()
4525     {
4526         verifyValidParaOrLine();
4527         return paraCount;
4528     }
4529 
4530     /**
4531      * Get a paragraph, given the index of this paragraph.
4532      *
4533      * This method returns information about a paragraph.<p>
4534      *
4535      * @param paraIndex is the number of the paragraph, in the
4536      *        range <code>[0..countParagraphs()-1]</code>.
4537      *
4538      * @return a BidiRun object with the details of the paragraph:<br>
4539      *        <code>start</code> will receive the index of the first character
4540      *        of the paragraph in the text.<br>
4541      *        <code>limit</code> will receive the limit of the paragraph.<br>
4542      *        <code>embeddingLevel</code> will receive the level of the paragraph.
4543      *
4544      * @throws IllegalStateException if this call is not preceded by a successful
4545      *         call to <code>setPara</code> or <code>setLine</code>
4546      * @throws IllegalArgumentException if paraIndex is not in the range
4547      *        <code>[0..countParagraphs()-1]</code>
4548      *
4549      * @see ohos.global.icu.text.BidiRun
4550      */
getParagraphByIndex(int paraIndex)4551     public BidiRun getParagraphByIndex(int paraIndex)
4552     {
4553         verifyValidParaOrLine();
4554         verifyRange(paraIndex, 0, paraCount);
4555 
4556         Bidi bidi = paraBidi;             /* get Para object if Line object */
4557         int paraStart;
4558         if (paraIndex == 0) {
4559             paraStart = 0;
4560         } else {
4561             paraStart = bidi.paras_limit[paraIndex - 1];
4562         }
4563         BidiRun bidiRun = new BidiRun();
4564         bidiRun.start = paraStart;
4565         bidiRun.limit = bidi.paras_limit[paraIndex];
4566         bidiRun.level = GetParaLevelAt(paraStart);
4567         return bidiRun;
4568     }
4569 
4570     /**
4571      * Get a paragraph, given a position within the text.
4572      * This method returns information about a paragraph.<br>
4573      * Note: if the paragraph index is known, it is more efficient to
4574      * retrieve the paragraph information using getParagraphByIndex().<p>
4575      *
4576      * @param charIndex is the index of a character within the text, in the
4577      *        range <code>[0..getProcessedLength()-1]</code>.
4578      *
4579      * @return a BidiRun object with the details of the paragraph:<br>
4580      *        <code>start</code> will receive the index of the first character
4581      *        of the paragraph in the text.<br>
4582      *        <code>limit</code> will receive the limit of the paragraph.<br>
4583      *        <code>embeddingLevel</code> will receive the level of the paragraph.
4584      *
4585      * @throws IllegalStateException if this call is not preceded by a successful
4586      *         call to <code>setPara</code> or <code>setLine</code>
4587      * @throws IllegalArgumentException if charIndex is not within the legal range
4588      *
4589      * @see ohos.global.icu.text.BidiRun
4590      * @see #getParagraphByIndex
4591      * @see #getProcessedLength
4592      */
getParagraph(int charIndex)4593     public BidiRun getParagraph(int charIndex)
4594     {
4595         verifyValidParaOrLine();
4596         Bidi bidi = paraBidi;             /* get Para object if Line object */
4597         verifyRange(charIndex, 0, bidi.length);
4598         int paraIndex;
4599         for (paraIndex = 0; charIndex >= bidi.paras_limit[paraIndex]; paraIndex++) {
4600         }
4601         return getParagraphByIndex(paraIndex);
4602     }
4603 
4604     /**
4605      * Get the index of a paragraph, given a position within the text.<p>
4606      *
4607      * @param charIndex is the index of a character within the text, in the
4608      *        range <code>[0..getProcessedLength()-1]</code>.
4609      *
4610      * @return The index of the paragraph containing the specified position,
4611      *         starting from 0.
4612      *
4613      * @throws IllegalStateException if this call is not preceded by a successful
4614      *         call to <code>setPara</code> or <code>setLine</code>
4615      * @throws IllegalArgumentException if charIndex is not within the legal range
4616      *
4617      * @see ohos.global.icu.text.BidiRun
4618      * @see #getProcessedLength
4619      */
getParagraphIndex(int charIndex)4620     public int getParagraphIndex(int charIndex)
4621     {
4622         verifyValidParaOrLine();
4623         Bidi bidi = paraBidi;             /* get Para object if Line object */
4624         verifyRange(charIndex, 0, bidi.length);
4625         int paraIndex;
4626         for (paraIndex = 0; charIndex >= bidi.paras_limit[paraIndex]; paraIndex++) {
4627         }
4628         return paraIndex;
4629     }
4630 
4631     /**
4632      * Set a custom Bidi classifier used by the UBA implementation for Bidi
4633      * class determination.
4634      *
4635      * @param classifier A new custom classifier. This can be null.
4636      *
4637      * @see #getCustomClassifier
4638      */
setCustomClassifier(BidiClassifier classifier)4639     public void setCustomClassifier(BidiClassifier classifier) {
4640         this.customClassifier = classifier;
4641     }
4642 
4643     /**
4644      * Gets the current custom class classifier used for Bidi class
4645      * determination.
4646      *
4647      * @return An instance of class <code>BidiClassifier</code>
4648      *
4649      * @see #setCustomClassifier
4650      */
getCustomClassifier()4651     public BidiClassifier getCustomClassifier() {
4652         return this.customClassifier;
4653     }
4654 
4655     /**
4656      * Retrieves the Bidi class for a given code point.
4657      * <p>If a <code>BidiClassifier</code> is defined and returns a value
4658      * other than <code>UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)+1</code>,
4659      * that value is used; otherwise the default class determination mechanism is invoked.
4660      *
4661      * @param c The code point to get a Bidi class for.
4662      *
4663      * @return The Bidi class for the character <code>c</code> that is in effect
4664      *         for this <code>Bidi</code> instance.
4665      *
4666      * @see BidiClassifier
4667      */
getCustomizedClass(int c)4668     public int getCustomizedClass(int c) {
4669         int dir;
4670 
4671         if (customClassifier == null ||
4672                 (dir = customClassifier.classify(c)) == Bidi.CLASS_DEFAULT) {
4673             dir = bdp.getClass(c);
4674         }
4675         if (dir >= UCharacterDirection.CHAR_DIRECTION_COUNT)
4676             dir = ON;
4677         return dir;
4678     }
4679 
4680     /**
4681      * <code>setLine()</code> returns a <code>Bidi</code> object to
4682      * contain the reordering information, especially the resolved levels,
4683      * for all the characters in a line of text. This line of text is
4684      * specified by referring to a <code>Bidi</code> object representing
4685      * this information for a piece of text containing one or more paragraphs,
4686      * and by specifying a range of indexes in this text.<p>
4687      * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
4688      *
4689      * This is used after calling <code>setPara()</code>
4690      * for a piece of text, and after line-breaking on that text.
4691      * It is not necessary if each paragraph is treated as a single line.<p>
4692      *
4693      * After line-breaking, rules (L1) and (L2) for the treatment of
4694      * trailing WS and for reordering are performed on
4695      * a <code>Bidi</code> object that represents a line.<p>
4696      *
4697      * <strong>Important: </strong>the line <code>Bidi</code> object may
4698      * reference data within the global text <code>Bidi</code> object.
4699      * You should not alter the content of the global text object until
4700      * you are finished using the line object.
4701      *
4702      * @param start is the line's first index into the text.
4703      *
4704      * @param limit is just behind the line's last index into the text
4705      *        (its last index +1).
4706      *
4707      * @return a <code>Bidi</code> object that will now represent a line of the text.
4708      *
4709      * @throws IllegalStateException if this call is not preceded by a successful
4710      *         call to <code>setPara</code>
4711      * @throws IllegalArgumentException if start and limit are not in the range
4712      *         <code>0&lt;=start&lt;limit&lt;=getProcessedLength()</code>,
4713      *         or if the specified line crosses a paragraph boundary
4714      *
4715      * @see #setPara
4716      * @see #getProcessedLength
4717      */
setLine(int start, int limit)4718     public Bidi setLine(int start, int limit)
4719     {
4720         verifyValidPara();
4721         verifyRange(start, 0, limit);
4722         verifyRange(limit, 0, length+1);
4723         if (getParagraphIndex(start) != getParagraphIndex(limit - 1)) {
4724             /* the line crosses a paragraph boundary */
4725             throw new IllegalArgumentException();
4726         }
4727         return BidiLine.setLine(this, start, limit);
4728     }
4729 
4730     /**
4731      * Get the level for one character.
4732      *
4733      * @param charIndex the index of a character.
4734      *
4735      * @return The level for the character at <code>charIndex</code>.
4736      *
4737      * @throws IllegalStateException if this call is not preceded by a successful
4738      *         call to <code>setPara</code> or <code>setLine</code>
4739      * @throws IllegalArgumentException if charIndex is not in the range
4740      *         <code>0&lt;=charIndex&lt;getProcessedLength()</code>
4741      *
4742      * @see #getProcessedLength
4743      */
getLevelAt(int charIndex)4744     public byte getLevelAt(int charIndex)
4745     {
4746         verifyValidParaOrLine();
4747         verifyRange(charIndex, 0, length);
4748         return BidiLine.getLevelAt(this, charIndex);
4749     }
4750 
4751     /**
4752      * Get an array of levels for each character.<p>
4753      *
4754      * Note that this method may allocate memory under some
4755      * circumstances, unlike <code>getLevelAt()</code>.
4756      *
4757      * @return The levels array for the text,
4758      *         or <code>null</code> if an error occurs.
4759      *
4760      * @throws IllegalStateException if this call is not preceded by a successful
4761      *         call to <code>setPara</code> or <code>setLine</code>
4762      */
getLevels()4763     public byte[] getLevels()
4764     {
4765         verifyValidParaOrLine();
4766         if (length <= 0) {
4767             return new byte[0];
4768         }
4769         return BidiLine.getLevels(this);
4770     }
4771 
4772     /**
4773      * Get a logical run.
4774      * This method returns information about a run and is used
4775      * to retrieve runs in logical order.<p>
4776      * This is especially useful for line-breaking on a paragraph.
4777      *
4778      * @param logicalPosition is a logical position within the source text.
4779      *
4780      * @return a BidiRun object filled with <code>start</code> containing
4781      *        the first character of the run, <code>limit</code> containing
4782      *        the limit of the run, and <code>embeddingLevel</code> containing
4783      *        the level of the run.
4784      *
4785      * @throws IllegalStateException if this call is not preceded by a successful
4786      *         call to <code>setPara</code> or <code>setLine</code>
4787      * @throws IllegalArgumentException if logicalPosition is not in the range
4788      *         <code>0&lt;=logicalPosition&lt;getProcessedLength()</code>
4789      *
4790      * @see ohos.global.icu.text.BidiRun
4791      * @see ohos.global.icu.text.BidiRun#getStart()
4792      * @see ohos.global.icu.text.BidiRun#getLimit()
4793      * @see ohos.global.icu.text.BidiRun#getEmbeddingLevel()
4794      */
getLogicalRun(int logicalPosition)4795     public BidiRun getLogicalRun(int logicalPosition)
4796     {
4797         verifyValidParaOrLine();
4798         verifyRange(logicalPosition, 0, length);
4799         return BidiLine.getLogicalRun(this, logicalPosition);
4800     }
4801 
4802     /**
4803      * Get the number of runs.
4804      * This method may invoke the actual reordering on the
4805      * <code>Bidi</code> object, after <code>setPara()</code>
4806      * may have resolved only the levels of the text. Therefore,
4807      * <code>countRuns()</code> may have to allocate memory,
4808      * and may throw an exception if it fails to do so.
4809      *
4810      * @return The number of runs.
4811      *
4812      * @throws IllegalStateException if this call is not preceded by a successful
4813      *         call to <code>setPara</code> or <code>setLine</code>
4814      */
countRuns()4815     public int countRuns()
4816     {
4817         verifyValidParaOrLine();
4818         BidiLine.getRuns(this);
4819         return runCount;
4820     }
4821 
4822     /**
4823      *
4824      * Get a <code>BidiRun</code> object according to its index. BidiRun methods
4825      * may be used to retrieve the run's logical start, length and level,
4826      * which can be even for an LTR run or odd for an RTL run.
4827      * In an RTL run, the character at the logical start is
4828      * visually on the right of the displayed run.
4829      * The length is the number of characters in the run.<p>
4830      * <code>countRuns()</code> is normally called
4831      * before the runs are retrieved.
4832      *
4833      * <p>
4834      *  Example:
4835      * <pre>
4836      *  Bidi bidi = new Bidi();
4837      *  String text = "abc 123 DEFG xyz";
4838      *  bidi.setPara(text, Bidi.RTL, null);
4839      *  int i, count=bidi.countRuns(), logicalStart, visualIndex=0, length;
4840      *  BidiRun run;
4841      *  for (i = 0; i &lt; count; ++i) {
4842      *      run = bidi.getVisualRun(i);
4843      *      logicalStart = run.getStart();
4844      *      length = run.getLength();
4845      *      if (Bidi.LTR == run.getEmbeddingLevel()) {
4846      *          do { // LTR
4847      *              show_char(text.charAt(logicalStart++), visualIndex++);
4848      *          } while (--length &gt; 0);
4849      *      } else {
4850      *          logicalStart += length;  // logicalLimit
4851      *          do { // RTL
4852      *              show_char(text.charAt(--logicalStart), visualIndex++);
4853      *          } while (--length &gt; 0);
4854      *      }
4855      *  }
4856      * </pre>
4857      * <p>
4858      * Note that in right-to-left runs, code like this places
4859      * second surrogates before first ones (which is generally a bad idea)
4860      * and combining characters before base characters.
4861      * <p>
4862      * Use of <code>{@link #writeReordered}</code>, optionally with the
4863      * <code>{@link #KEEP_BASE_COMBINING}</code> option, can be considered in
4864      * order to avoid these issues.
4865      *
4866      * @param runIndex is the number of the run in visual order, in the
4867      *        range <code>[0..countRuns()-1]</code>.
4868      *
4869      * @return a BidiRun object containing the details of the run. The
4870      *         directionality of the run is
4871      *         <code>LTR==0</code> or <code>RTL==1</code>,
4872      *         never <code>MIXED</code>.
4873      *
4874      * @throws IllegalStateException if this call is not preceded by a successful
4875      *         call to <code>setPara</code> or <code>setLine</code>
4876      * @throws IllegalArgumentException if <code>runIndex</code> is not in
4877      *         the range <code>0&lt;=runIndex&lt;countRuns()</code>
4878      *
4879      * @see #countRuns()
4880      * @see ohos.global.icu.text.BidiRun
4881      * @see ohos.global.icu.text.BidiRun#getStart()
4882      * @see ohos.global.icu.text.BidiRun#getLength()
4883      * @see ohos.global.icu.text.BidiRun#getEmbeddingLevel()
4884      */
getVisualRun(int runIndex)4885     public BidiRun getVisualRun(int runIndex)
4886     {
4887         verifyValidParaOrLine();
4888         BidiLine.getRuns(this);
4889         verifyRange(runIndex, 0, runCount);
4890         return BidiLine.getVisualRun(this, runIndex);
4891     }
4892 
4893     /**
4894      * Get the visual position from a logical text position.
4895      * If such a mapping is used many times on the same
4896      * <code>Bidi</code> object, then calling
4897      * <code>getLogicalMap()</code> is more efficient.
4898      * <p>
4899      * The value returned may be <code>MAP_NOWHERE</code> if there is no
4900      * visual position because the corresponding text character is a Bidi
4901      * control removed from output by the option
4902      * <code>OPTION_REMOVE_CONTROLS</code>.
4903      * <p>
4904      * When the visual output is altered by using options of
4905      * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
4906      * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
4907      * <code>REMOVE_BIDI_CONTROLS</code>, the visual position returned may not
4908      * be correct. It is advised to use, when possible, reordering options
4909      * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
4910      * <p>
4911      * Note that in right-to-left runs, this mapping places
4912      * second surrogates before first ones (which is generally a bad idea)
4913      * and combining characters before base characters.
4914      * Use of <code>{@link #writeReordered}</code>, optionally with the
4915      * <code>{@link #KEEP_BASE_COMBINING}</code> option can be considered instead
4916      * of using the mapping, in order to avoid these issues.
4917      *
4918      * @param logicalIndex is the index of a character in the text.
4919      *
4920      * @return The visual position of this character.
4921      *
4922      * @throws IllegalStateException if this call is not preceded by a successful
4923      *         call to <code>setPara</code> or <code>setLine</code>
4924      * @throws IllegalArgumentException if <code>logicalIndex</code> is not in
4925      *         the range <code>0&lt;=logicalIndex&lt;getProcessedLength()</code>
4926      *
4927      * @see #getLogicalMap
4928      * @see #getLogicalIndex
4929      * @see #getProcessedLength
4930      * @see #MAP_NOWHERE
4931      * @see #OPTION_REMOVE_CONTROLS
4932      * @see #writeReordered
4933      */
getVisualIndex(int logicalIndex)4934     public int getVisualIndex(int logicalIndex)
4935     {
4936         verifyValidParaOrLine();
4937         verifyRange(logicalIndex, 0, length);
4938         return BidiLine.getVisualIndex(this, logicalIndex);
4939     }
4940 
4941 
4942     /**
4943      * Get the logical text position from a visual position.
4944      * If such a mapping is used many times on the same
4945      * <code>Bidi</code> object, then calling
4946      * <code>getVisualMap()</code> is more efficient.
4947      * <p>
4948      * The value returned may be <code>MAP_NOWHERE</code> if there is no
4949      * logical position because the corresponding text character is a Bidi
4950      * mark inserted in the output by option
4951      * <code>OPTION_INSERT_MARKS</code>.
4952      * <p>
4953      * This is the inverse method to <code>getVisualIndex()</code>.
4954      * <p>
4955      * When the visual output is altered by using options of
4956      * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
4957      * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
4958      * <code>REMOVE_BIDI_CONTROLS</code>, the logical position returned may not
4959      * be correct. It is advised to use, when possible, reordering options
4960      * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
4961      *
4962      * @param visualIndex is the visual position of a character.
4963      *
4964      * @return The index of this character in the text.
4965      *
4966      * @throws IllegalStateException if this call is not preceded by a successful
4967      *         call to <code>setPara</code> or <code>setLine</code>
4968      * @throws IllegalArgumentException if <code>visualIndex</code> is not in
4969      *         the range <code>0&lt;=visualIndex&lt;getResultLength()</code>
4970      *
4971      * @see #getVisualMap
4972      * @see #getVisualIndex
4973      * @see #getResultLength
4974      * @see #MAP_NOWHERE
4975      * @see #OPTION_INSERT_MARKS
4976      * @see #writeReordered
4977      */
getLogicalIndex(int visualIndex)4978     public int getLogicalIndex(int visualIndex)
4979     {
4980         verifyValidParaOrLine();
4981         verifyRange(visualIndex, 0, resultLength);
4982         /* we can do the trivial cases without the runs array */
4983         if (insertPoints.size == 0 && controlCount == 0) {
4984             if (direction == LTR) {
4985                 return visualIndex;
4986             }
4987             else if (direction == RTL) {
4988                 return length - visualIndex - 1;
4989             }
4990         }
4991         BidiLine.getRuns(this);
4992         return BidiLine.getLogicalIndex(this, visualIndex);
4993     }
4994 
4995     /**
4996      * Get a logical-to-visual index map (array) for the characters in the
4997      * <code>Bidi</code> (paragraph or line) object.
4998      * <p>
4999      * Some values in the map may be <code>MAP_NOWHERE</code> if the
5000      * corresponding text characters are Bidi controls removed from the visual
5001      * output by the option <code>OPTION_REMOVE_CONTROLS</code>.
5002      * <p>
5003      * When the visual output is altered by using options of
5004      * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
5005      * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
5006      * <code>REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not
5007      * be correct. It is advised to use, when possible, reordering options
5008      * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
5009      * <p>
5010      * Note that in right-to-left runs, this mapping places
5011      * second surrogates before first ones (which is generally a bad idea)
5012      * and combining characters before base characters.
5013      * Use of <code>{@link #writeReordered}</code>, optionally with the
5014      * <code>{@link #KEEP_BASE_COMBINING}</code> option can be considered instead
5015      * of using the mapping, in order to avoid these issues.
5016      *
5017      * @return an array of <code>getProcessedLength()</code>
5018      *        indexes which will reflect the reordering of the characters.<br><br>
5019      *        The index map will result in
5020      *        <code>indexMap[logicalIndex]==visualIndex</code>, where
5021      *        <code>indexMap</code> represents the returned array.
5022      *
5023      * @throws IllegalStateException if this call is not preceded by a successful
5024      *         call to <code>setPara</code> or <code>setLine</code>
5025      *
5026      * @see #getVisualMap
5027      * @see #getVisualIndex
5028      * @see #getProcessedLength
5029      * @see #MAP_NOWHERE
5030      * @see #OPTION_REMOVE_CONTROLS
5031      * @see #writeReordered
5032      */
getLogicalMap()5033     public int[] getLogicalMap()
5034     {
5035         /* countRuns() checks successful call to setPara/setLine */
5036         countRuns();
5037         if (length <= 0) {
5038             return new int[0];
5039         }
5040         return BidiLine.getLogicalMap(this);
5041     }
5042 
5043     /**
5044      * Get a visual-to-logical index map (array) for the characters in the
5045      * <code>Bidi</code> (paragraph or line) object.
5046      * <p>
5047      * Some values in the map may be <code>MAP_NOWHERE</code> if the
5048      * corresponding text characters are Bidi marks inserted in the visual
5049      * output by the option <code>OPTION_INSERT_MARKS</code>.
5050      * <p>
5051      * When the visual output is altered by using options of
5052      * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
5053      * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
5054      * <code>REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
5055      * be correct. It is advised to use, when possible, reordering options
5056      * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
5057      *
5058      * @return an array of <code>getResultLength()</code>
5059      *        indexes which will reflect the reordering of the characters.<br><br>
5060      *        The index map will result in
5061      *        <code>indexMap[visualIndex]==logicalIndex</code>, where
5062      *        <code>indexMap</code> represents the returned array.
5063      *
5064      * @throws IllegalStateException if this call is not preceded by a successful
5065      *         call to <code>setPara</code> or <code>setLine</code>
5066      *
5067      * @see #getLogicalMap
5068      * @see #getLogicalIndex
5069      * @see #getResultLength
5070      * @see #MAP_NOWHERE
5071      * @see #OPTION_INSERT_MARKS
5072      * @see #writeReordered
5073      */
getVisualMap()5074     public int[] getVisualMap()
5075     {
5076         /* countRuns() checks successful call to setPara/setLine */
5077         countRuns();
5078         if (resultLength <= 0) {
5079             return new int[0];
5080         }
5081         return BidiLine.getVisualMap(this);
5082     }
5083 
5084     /**
5085      * This is a convenience method that does not use a <code>Bidi</code> object.
5086      * It is intended to be used for when an application has determined the levels
5087      * of objects (character sequences) and just needs to have them reordered (L2).
5088      * This is equivalent to using <code>getLogicalMap()</code> on a
5089      * <code>Bidi</code> object.
5090      *
5091      * @param levels is an array of levels that have been determined by
5092      *        the application.
5093      *
5094      * @return an array of <code>levels.length</code>
5095      *        indexes which will reflect the reordering of the characters.<p>
5096      *        The index map will result in
5097      *        <code>indexMap[logicalIndex]==visualIndex</code>, where
5098      *        <code>indexMap</code> represents the returned array.
5099      */
reorderLogical(byte[] levels)5100     public static int[] reorderLogical(byte[] levels)
5101     {
5102         return BidiLine.reorderLogical(levels);
5103     }
5104 
5105     /**
5106      * This is a convenience method that does not use a <code>Bidi</code> object.
5107      * It is intended to be used for when an application has determined the levels
5108      * of objects (character sequences) and just needs to have them reordered (L2).
5109      * This is equivalent to using <code>getVisualMap()</code> on a
5110      * <code>Bidi</code> object.
5111      *
5112      * @param levels is an array of levels that have been determined by
5113      *        the application.
5114      *
5115      * @return an array of <code>levels.length</code>
5116      *        indexes which will reflect the reordering of the characters.<p>
5117      *        The index map will result in
5118      *        <code>indexMap[visualIndex]==logicalIndex</code>, where
5119      *        <code>indexMap</code> represents the returned array.
5120      */
reorderVisual(byte[] levels)5121     public static int[] reorderVisual(byte[] levels)
5122     {
5123         return BidiLine.reorderVisual(levels);
5124     }
5125 
5126     /**
5127      * Invert an index map.
5128      * The index mapping of the argument map is inverted and returned as
5129      * an array of indexes that we will call the inverse map.
5130      *
5131      * @param srcMap is an array whose elements define the original mapping
5132      * from a source array to a destination array.
5133      * Some elements of the source array may have no mapping in the
5134      * destination array. In that case, their value will be
5135      * the special value <code>MAP_NOWHERE</code>.
5136      * All elements must be &gt;=0 or equal to <code>MAP_NOWHERE</code>.
5137      * Some elements in the source map may have a value greater than the
5138      * srcMap.length if the destination array has more elements than the
5139      * source array.
5140      * There must be no duplicate indexes (two or more elements with the
5141      * same value except <code>MAP_NOWHERE</code>).
5142      *
5143      * @return an array representing the inverse map.
5144      *         This array has a number of elements equal to 1 + the highest
5145      *         value in <code>srcMap</code>.
5146      *         For elements of the result array which have no matching elements
5147      *         in the source array, the corresponding elements in the inverse
5148      *         map will receive a value equal to <code>MAP_NOWHERE</code>.
5149      *         If element with index i in <code>srcMap</code> has a value k different
5150      *         from <code>MAP_NOWHERE</code>, this means that element i of
5151      *         the source array maps to element k in the destination array.
5152      *         The inverse map will have value i in its k-th element.
5153      *         For all elements of the destination array which do not map to
5154      *         an element in the source array, the corresponding element in the
5155      *         inverse map will have a value equal to <code>MAP_NOWHERE</code>.
5156      *
5157      * @see #MAP_NOWHERE
5158      */
invertMap(int[] srcMap)5159     public static int[] invertMap(int[] srcMap)
5160     {
5161         if (srcMap == null) {
5162             return null;
5163         } else {
5164             return BidiLine.invertMap(srcMap);
5165         }
5166     }
5167 
5168     /*
5169      * Fields and methods for compatibility with java.text.bidi (Sun implementation)
5170      */
5171 
5172     /**
5173      * Constant indicating base direction is left-to-right.
5174      */
5175     public static final int DIRECTION_LEFT_TO_RIGHT = LTR;
5176 
5177     /**
5178      * Constant indicating base direction is right-to-left.
5179      */
5180     public static final int DIRECTION_RIGHT_TO_LEFT = RTL;
5181 
5182     /**
5183      * Constant indicating that the base direction depends on the first strong
5184      * directional character in the text according to the Unicode Bidirectional
5185      * Algorithm. If no strong directional character is present, the base
5186      * direction is left-to-right.
5187      */
5188     public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = LEVEL_DEFAULT_LTR;
5189 
5190     /**
5191      * Constant indicating that the base direction depends on the first strong
5192      * directional character in the text according to the Unicode Bidirectional
5193      * Algorithm. If no strong directional character is present, the base
5194      * direction is right-to-left.
5195      */
5196     public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = LEVEL_DEFAULT_RTL;
5197 
5198     /**
5199      * Create Bidi from the given paragraph of text and base direction.
5200      *
5201      * @param paragraph a paragraph of text
5202      * @param flags a collection of flags that control the algorithm. The
5203      *        algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,
5204      *        DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and
5205      *        DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.
5206      * @see #DIRECTION_LEFT_TO_RIGHT
5207      * @see #DIRECTION_RIGHT_TO_LEFT
5208      * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
5209      * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
5210      */
Bidi(String paragraph, int flags)5211     public Bidi(String paragraph, int flags)
5212     {
5213         this(paragraph.toCharArray(), 0, null, 0, paragraph.length(), flags);
5214     }
5215 
5216     /**
5217      * Create Bidi from the given paragraph of text.<p>
5218      *
5219      * The RUN_DIRECTION attribute in the text, if present, determines the base
5220      * direction (left-to-right or right-to-left). If not present, the base
5221      * direction is computed using the Unicode Bidirectional Algorithm,
5222      * defaulting to left-to-right if there are no strong directional characters
5223      * in the text. This attribute, if present, must be applied to all the text
5224      * in the paragraph.<p>
5225      *
5226      * The BIDI_EMBEDDING attribute in the text, if present, represents
5227      * embedding level information.
5228      * Negative values indicate overrides at the absolute value of the level.
5229      * Positive values indicate embeddings. (See {@link #MAX_EXPLICIT_LEVEL}.)
5230      * Where values are zero or not defined, the base
5231      * embedding level as determined by the base direction is assumed.<p>
5232      *
5233      * The NUMERIC_SHAPING attribute in the text, if present, converts European
5234      * digits to other decimal digits before running the bidi algorithm. This
5235      * attribute, if present, must be applied to all the text in the paragraph.<p>
5236      *
5237      * Note: this constructor calls setPara() internally.
5238      *
5239      * @param paragraph a paragraph of text with optional character and
5240      *        paragraph attribute information
5241      */
Bidi(AttributedCharacterIterator paragraph)5242     public Bidi(AttributedCharacterIterator paragraph)
5243     {
5244         this();
5245         setPara(paragraph);
5246     }
5247 
5248     /**
5249      * Create Bidi from the given text, embedding, and direction information.
5250      *
5251      * <p>The embeddings array may be null. If present, the values represent
5252      * embedding level information.
5253      * Negative values indicate overrides at the absolute value of the level.
5254      * Positive values indicate embeddings. (See {@link #MAX_EXPLICIT_LEVEL}.)
5255      * Where values are zero, the base embedding level
5256      * as determined by the base direction is assumed,
5257      * except for paragraph separators which remain at 0 to prevent reordering of paragraphs.</p>
5258      *
5259      * <p>Note: This constructor calls setPara() internally,
5260      * after converting the java.text.Bidi-style embeddings with negative overrides
5261      * into ICU-style embeddings with bit fields for {@link #LEVEL_OVERRIDE} and the level.
5262      *
5263      * @param text an array containing the paragraph of text to process.
5264      * @param textStart the index into the text array of the start of the
5265      *        paragraph.
5266      * @param embeddings an array containing embedding values for each character
5267      *        in the paragraph. This can be null, in which case it is assumed
5268      *        that there is no external embedding information.
5269      * @param embStart the index into the embedding array of the start of the
5270      *        paragraph.
5271      * @param paragraphLength the length of the paragraph in the text and
5272      *        embeddings arrays.
5273      * @param flags a collection of flags that control the algorithm. The
5274      *        algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,
5275      *        DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and
5276      *        DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.
5277      *
5278      * @throws IllegalArgumentException if the values in embeddings are
5279      *         not within the allowed range
5280      *
5281      * @see #DIRECTION_LEFT_TO_RIGHT
5282      * @see #DIRECTION_RIGHT_TO_LEFT
5283      * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
5284      * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
5285      */
Bidi(char[] text, int textStart, byte[] embeddings, int embStart, int paragraphLength, int flags)5286     public Bidi(char[] text,
5287             int textStart,
5288             byte[] embeddings,
5289             int embStart,
5290             int paragraphLength,
5291             int flags)
5292     {
5293         this();
5294         byte paraLvl;
5295         switch (flags) {
5296         case DIRECTION_LEFT_TO_RIGHT:
5297         default:
5298             paraLvl = LTR;
5299             break;
5300         case DIRECTION_RIGHT_TO_LEFT:
5301             paraLvl = RTL;
5302             break;
5303         case DIRECTION_DEFAULT_LEFT_TO_RIGHT:
5304             paraLvl = LEVEL_DEFAULT_LTR;
5305             break;
5306         case DIRECTION_DEFAULT_RIGHT_TO_LEFT:
5307             paraLvl = LEVEL_DEFAULT_RTL;
5308             break;
5309         }
5310         byte[] paraEmbeddings;
5311         if (embeddings == null) {
5312             paraEmbeddings = null;
5313         } else {
5314             // Convert from java.text.Bidi embeddings to ICU setPara() levels:
5315             // Copy to the start of a new array and convert java.text negative overrides
5316             // to ICU bit-field-and-mask overrides.
5317             // A copy of the embeddings is always required because
5318             // setPara() may modify its embeddings.
5319             paraEmbeddings = new byte[paragraphLength];
5320             byte lev;
5321             for (int i = 0; i < paragraphLength; i++) {
5322                 lev = embeddings[i + embStart];
5323                 if (lev < 0) {
5324                     lev = (byte)((- lev) | LEVEL_OVERRIDE);
5325                 }
5326                 // setPara() lifts level 0 up to the resolved paragraph level.
5327                 paraEmbeddings[i] = lev;
5328             }
5329         }
5330         if (textStart == 0 && paragraphLength == text.length) {
5331             setPara(text, paraLvl, paraEmbeddings);
5332         } else {
5333             char[] paraText = new char[paragraphLength];
5334             System.arraycopy(text, textStart, paraText, 0, paragraphLength);
5335             setPara(paraText, paraLvl, paraEmbeddings);
5336         }
5337     }
5338 
5339     /**
5340      * Create a Bidi object representing the bidi information on a line of text
5341      * within the paragraph represented by the current Bidi. This call is not
5342      * required if the entire paragraph fits on one line.
5343      *
5344      * @param lineStart the offset from the start of the paragraph to the start
5345      *        of the line.
5346      * @param lineLimit the offset from the start of the paragraph to the limit
5347      *        of the line.
5348      *
5349      * @throws IllegalStateException if this call is not preceded by a successful
5350      *         call to <code>setPara</code>
5351      * @throws IllegalArgumentException if lineStart and lineLimit are not in the range
5352      *         <code>0&lt;=lineStart&lt;lineLimit&lt;=getProcessedLength()</code>,
5353      *         or if the specified line crosses a paragraph boundary
5354      */
createLineBidi(int lineStart, int lineLimit)5355     public Bidi createLineBidi(int lineStart, int lineLimit)
5356     {
5357         return setLine(lineStart, lineLimit);
5358     }
5359 
5360     /**
5361      * Return true if the line is not left-to-right or right-to-left. This means
5362      * it either has mixed runs of left-to-right and right-to-left text, or the
5363      * base direction differs from the direction of the only run of text.
5364      *
5365      * @return true if the line is not left-to-right or right-to-left.
5366      *
5367      * @throws IllegalStateException if this call is not preceded by a successful
5368      *         call to <code>setPara</code>
5369      */
isMixed()5370     public boolean isMixed()
5371     {
5372         return (!isLeftToRight() && !isRightToLeft());
5373     }
5374 
5375     /**
5376      * Return true if the line is all left-to-right text and the base direction
5377      * is left-to-right.
5378      *
5379      * @return true if the line is all left-to-right text and the base direction
5380      *         is left-to-right.
5381      *
5382      * @throws IllegalStateException if this call is not preceded by a successful
5383      *         call to <code>setPara</code>
5384      */
isLeftToRight()5385     public boolean isLeftToRight()
5386     {
5387         return (getDirection() == LTR && (paraLevel & 1) == 0);
5388     }
5389 
5390     /**
5391      * Return true if the line is all right-to-left text, and the base direction
5392      * is right-to-left
5393      *
5394      * @return true if the line is all right-to-left text, and the base
5395      *         direction is right-to-left
5396      *
5397      * @throws IllegalStateException if this call is not preceded by a successful
5398      *         call to <code>setPara</code>
5399      */
isRightToLeft()5400     public boolean isRightToLeft()
5401     {
5402         return (getDirection() == RTL && (paraLevel & 1) == 1);
5403     }
5404 
5405     /**
5406      * Return true if the base direction is left-to-right
5407      *
5408      * @return true if the base direction is left-to-right
5409      *
5410      * @throws IllegalStateException if this call is not preceded by a successful
5411      *         call to <code>setPara</code> or <code>setLine</code>
5412      */
baseIsLeftToRight()5413     public boolean baseIsLeftToRight()
5414     {
5415         return (getParaLevel() == LTR);
5416     }
5417 
5418     /**
5419      * Return the base level (0 if left-to-right, 1 if right-to-left).
5420      *
5421      * @return the base level
5422      *
5423      * @throws IllegalStateException if this call is not preceded by a successful
5424      *         call to <code>setPara</code> or <code>setLine</code>
5425      */
getBaseLevel()5426     public int getBaseLevel()
5427     {
5428         return getParaLevel();
5429     }
5430 
5431     /**
5432      * Return the number of level runs.
5433      *
5434      * @return the number of level runs
5435      *
5436      * @throws IllegalStateException if this call is not preceded by a successful
5437      *         call to <code>setPara</code> or <code>setLine</code>
5438      */
getRunCount()5439     public int getRunCount()
5440     {
5441         return countRuns();
5442     }
5443 
5444     /**
5445      * Compute the logical to visual run mapping
5446      */
getLogicalToVisualRunsMap()5447      void getLogicalToVisualRunsMap()
5448      {
5449         if (isGoodLogicalToVisualRunsMap) {
5450             return;
5451         }
5452         int count = countRuns();
5453         if ((logicalToVisualRunsMap == null) ||
5454             (logicalToVisualRunsMap.length < count)) {
5455             logicalToVisualRunsMap = new int[count];
5456         }
5457         int i;
5458         long[] keys = new long[count];
5459         for (i = 0; i < count; i++) {
5460             keys[i] = ((long)(runs[i].start)<<32) + i;
5461         }
5462         Arrays.sort(keys);
5463         for (i = 0; i < count; i++) {
5464             logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF);
5465         }
5466         isGoodLogicalToVisualRunsMap = true;
5467      }
5468 
5469     /**
5470      * Return the level of the nth logical run in this line.
5471      *
5472      * @param run the index of the run, between 0 and <code>countRuns()-1</code>
5473      *
5474      * @return the level of the run
5475      *
5476      * @throws IllegalStateException if this call is not preceded by a successful
5477      *         call to <code>setPara</code> or <code>setLine</code>
5478      * @throws IllegalArgumentException if <code>run</code> is not in
5479      *         the range <code>0&lt;=run&lt;countRuns()</code>
5480      */
getRunLevel(int run)5481     public int getRunLevel(int run)
5482     {
5483         verifyValidParaOrLine();
5484         BidiLine.getRuns(this);
5485         verifyRange(run, 0, runCount);
5486         getLogicalToVisualRunsMap();
5487         return runs[logicalToVisualRunsMap[run]].level;
5488     }
5489 
5490     /**
5491      * Return the index of the character at the start of the nth logical run in
5492      * this line, as an offset from the start of the line.
5493      *
5494      * @param run the index of the run, between 0 and <code>countRuns()</code>
5495      *
5496      * @return the start of the run
5497      *
5498      * @throws IllegalStateException if this call is not preceded by a successful
5499      *         call to <code>setPara</code> or <code>setLine</code>
5500      * @throws IllegalArgumentException if <code>run</code> is not in
5501      *         the range <code>0&lt;=run&lt;countRuns()</code>
5502      */
getRunStart(int run)5503     public int getRunStart(int run)
5504     {
5505         verifyValidParaOrLine();
5506         BidiLine.getRuns(this);
5507         verifyRange(run, 0, runCount);
5508         getLogicalToVisualRunsMap();
5509         return runs[logicalToVisualRunsMap[run]].start;
5510     }
5511 
5512     /**
5513      * Return the index of the character past the end of the nth logical run in
5514      * this line, as an offset from the start of the line. For example, this
5515      * will return the length of the line for the last run on the line.
5516      *
5517      * @param run the index of the run, between 0 and <code>countRuns()</code>
5518      *
5519      * @return the limit of the run
5520      *
5521      * @throws IllegalStateException if this call is not preceded by a successful
5522      *         call to <code>setPara</code> or <code>setLine</code>
5523      * @throws IllegalArgumentException if <code>run</code> is not in
5524      *         the range <code>0&lt;=run&lt;countRuns()</code>
5525      */
getRunLimit(int run)5526     public int getRunLimit(int run)
5527     {
5528         verifyValidParaOrLine();
5529         BidiLine.getRuns(this);
5530         verifyRange(run, 0, runCount);
5531         getLogicalToVisualRunsMap();
5532         int idx = logicalToVisualRunsMap[run];
5533         int len = idx == 0 ? runs[idx].limit :
5534                                 runs[idx].limit - runs[idx-1].limit;
5535         return runs[idx].start + len;
5536     }
5537 
5538     /**
5539      * Return true if the specified text requires bidi analysis. If this returns
5540      * false, the text will display left-to-right. Clients can then avoid
5541      * constructing a Bidi object. Text in the Arabic Presentation Forms area of
5542      * Unicode is presumed to already be shaped and ordered for display, and so
5543      * will not cause this method to return true.
5544      *
5545      * @param text the text containing the characters to test
5546      * @param start the start of the range of characters to test
5547      * @param limit the limit of the range of characters to test
5548      *
5549      * @return true if the range of characters requires bidi analysis
5550      */
requiresBidi(char[] text, int start, int limit)5551     public static boolean requiresBidi(char[] text,
5552             int start,
5553             int limit)
5554     {
5555         final int RTLMask = (1 << UCharacter.DIRECTIONALITY_RIGHT_TO_LEFT |
5556                 1 << UCharacter.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC |
5557                 1 << UCharacter.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING |
5558                 1 << UCharacter.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE |
5559                 1 << UCharacter.DIRECTIONALITY_ARABIC_NUMBER);
5560 
5561         for (int i = start; i < limit; ++i) {
5562             if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) {
5563                 return true;
5564             }
5565         }
5566         return false;
5567     }
5568 
5569     /**
5570      * Reorder the objects in the array into visual order based on their levels.
5571      * This is a utility method to use when you have a collection of objects
5572      * representing runs of text in logical order, each run containing text at a
5573      * single level. The elements at <code>index</code> from
5574      * <code>objectStart</code> up to <code>objectStart + count</code> in the
5575      * objects array will be reordered into visual order assuming
5576      * each run of text has the level indicated by the corresponding element in
5577      * the levels array (at <code>index - objectStart + levelStart</code>).
5578      *
5579      * @param levels an array representing the bidi level of each object
5580      * @param levelStart the start position in the levels array
5581      * @param objects the array of objects to be reordered into visual order
5582      * @param objectStart the start position in the objects array
5583      * @param count the number of objects to reorder
5584      */
reorderVisually(byte[] levels, int levelStart, Object[] objects, int objectStart, int count)5585     public static void reorderVisually(byte[] levels,
5586             int levelStart,
5587             Object[] objects,
5588             int objectStart,
5589             int count)
5590     {
5591         byte[] reorderLevels = new byte[count];
5592         System.arraycopy(levels, levelStart, reorderLevels, 0, count);
5593         int[] indexMap = reorderVisual(reorderLevels);
5594         Object[] temp = new Object[count];
5595         System.arraycopy(objects, objectStart, temp, 0, count);
5596         for (int i = 0; i < count; ++i) {
5597             objects[objectStart + i] = temp[indexMap[i]];
5598         }
5599     }
5600 
5601     /**
5602      * Take a <code>Bidi</code> object containing the reordering
5603      * information for a piece of text (one or more paragraphs) set by
5604      * <code>setPara()</code> or for a line of text set by <code>setLine()</code>
5605      * and return a string containing the reordered text.
5606      *
5607      * <p>The text may have been aliased (only a reference was stored
5608      * without copying the contents), thus it must not have been modified
5609      * since the <code>setPara()</code> call.
5610      *
5611      * This method preserves the integrity of characters with multiple
5612      * code units and (optionally) combining characters.
5613      * Characters in RTL runs can be replaced by mirror-image characters
5614      * in the returned string. Note that "real" mirroring has to be done in a
5615      * rendering engine by glyph selection and that for many "mirrored"
5616      * characters there are no Unicode characters as mirror-image equivalents.
5617      * There are also options to insert or remove Bidi control
5618      * characters; see the descriptions of the return value and the
5619      * <code>options</code> parameter, and of the option bit flags.
5620      *
5621      * @param options A bit set of options for the reordering that control
5622      *                how the reordered text is written.
5623      *                The options include mirroring the characters on a code
5624      *                point basis and inserting LRM characters, which is used
5625      *                especially for transforming visually stored text
5626      *                to logically stored text (although this is still an
5627      *                imperfect implementation of an "inverse Bidi" algorithm
5628      *                because it uses the "forward Bidi" algorithm at its core).
5629      *                The available options are:
5630      *                <code>DO_MIRRORING</code>,
5631      *                <code>INSERT_LRM_FOR_NUMERIC</code>,
5632      *                <code>KEEP_BASE_COMBINING</code>,
5633      *                <code>OUTPUT_REVERSE</code>,
5634      *                <code>REMOVE_BIDI_CONTROLS</code>,
5635      *                <code>STREAMING</code>
5636      *
5637      * @return The reordered text.
5638      *         If the <code>INSERT_LRM_FOR_NUMERIC</code> option is set, then
5639      *         the length of the returned string could be as large as
5640      *         <code>getLength()+2*countRuns()</code>.<br>
5641      *         If the <code>REMOVE_BIDI_CONTROLS</code> option is set, then the
5642      *         length of the returned string may be less than
5643      *         <code>getLength()</code>.<br>
5644      *         If none of these options is set, then the length of the returned
5645      *         string will be exactly <code>getProcessedLength()</code>.
5646      *
5647      * @throws IllegalStateException if this call is not preceded by a successful
5648      *         call to <code>setPara</code> or <code>setLine</code>
5649      *
5650      * @see #DO_MIRRORING
5651      * @see #INSERT_LRM_FOR_NUMERIC
5652      * @see #KEEP_BASE_COMBINING
5653      * @see #OUTPUT_REVERSE
5654      * @see #REMOVE_BIDI_CONTROLS
5655      * @see #OPTION_STREAMING
5656      * @see #getProcessedLength
5657      */
writeReordered(int options)5658     public String writeReordered(int options)
5659     {
5660         verifyValidParaOrLine();
5661         if (length == 0) {
5662             /* nothing to do */
5663             return "";
5664         }
5665         return BidiWriter.writeReordered(this, options);
5666     }
5667 
5668     /**
5669      * Reverse a Right-To-Left run of Unicode text.
5670      *
5671      * This method preserves the integrity of characters with multiple
5672      * code units and (optionally) combining characters.
5673      * Characters can be replaced by mirror-image characters
5674      * in the destination buffer. Note that "real" mirroring has
5675      * to be done in a rendering engine by glyph selection
5676      * and that for many "mirrored" characters there are no
5677      * Unicode characters as mirror-image equivalents.
5678      * There are also options to insert or remove Bidi control
5679      * characters.
5680      *
5681      * This method is the implementation for reversing RTL runs as part
5682      * of <code>writeReordered()</code>. For detailed descriptions
5683      * of the parameters, see there.
5684      * Since no Bidi controls are inserted here, the output string length
5685      * will never exceed <code>src.length()</code>.
5686      *
5687      * @see #writeReordered
5688      *
5689      * @param src The RTL run text.
5690      *
5691      * @param options A bit set of options for the reordering that control
5692      *                how the reordered text is written.
5693      *                See the <code>options</code> parameter in <code>writeReordered()</code>.
5694      *
5695      * @return The reordered text.
5696      *         If the <code>REMOVE_BIDI_CONTROLS</code> option
5697      *         is set, then the length of the returned string may be less than
5698      *         <code>src.length()</code>. If this option is not set,
5699      *         then the length of the returned string will be exactly
5700      *         <code>src.length()</code>.
5701      *
5702      * @throws IllegalArgumentException if <code>src</code> is null.
5703      */
writeReverse(String src, int options)5704     public static String writeReverse(String src, int options)
5705     {
5706         /* error checking */
5707         if (src == null) {
5708             throw new IllegalArgumentException();
5709         }
5710 
5711         if (src.length() > 0) {
5712             return BidiWriter.writeReverse(src, options);
5713         } else {
5714             /* nothing to do */
5715             return "";
5716         }
5717     }
5718 
5719 }
5720