• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 package java.text;
19 
20 import java.awt.font.NumericShaper;
21 import java.awt.font.TextAttribute;
22 import java.util.ArrayList;
23 import java.util.Arrays;
24 
25 /**
26  * Implements the <a href="http://unicode.org/reports/tr9/">Unicode Bidirectional Algorithm</a>.
27  *
28  * <p>Use a {@code Bidi} object to get the information on the position reordering of a
29  * bidirectional text, such as Arabic or Hebrew. The natural display ordering of
30  * horizontal text in these languages is from right to left, while they order
31  * numbers from left to right.
32  *
33  * <p>If the text contains multiple runs, the information of each run can be
34  * obtained from the run index. The level of any particular run indicates the
35  * direction of the text as well as the nesting level. Left-to-right runs have
36  * even levels while right-to-left runs have odd levels.
37  */
38 public final class Bidi {
39     /**
40      * Constant that indicates the default base level. If there is no strong
41      * character, then set the paragraph level to 0 (left-to-right).
42      */
43     public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
44 
45     /**
46      * Constant that indicates the default base level. If there is no strong
47      * character, then set the paragraph level to 1 (right-to-left).
48      */
49     public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
50 
51     /**
52      * Constant that specifies the default base level as 0 (left-to-right).
53      */
54     public static final int DIRECTION_LEFT_TO_RIGHT = 0;
55 
56     /**
57      * Constant that specifies the default base level as 1 (right-to-left).
58      */
59     public static final int DIRECTION_RIGHT_TO_LEFT = 1;
60 
61     /**
62      * TODO: if we care about performance, we might just want to use an int[] instead of a Run[].
63      */
64     static class Run {
65         private final int start;
66         private final int limit;
67         private final int level;
68 
Run(int start, int limit, int level)69         public Run(int start, int limit, int level) {
70             this.start = start;
71             this.limit = limit;
72             this.level = level;
73         }
74 
getLevel()75         public int getLevel() {
76             return level;
77         }
78 
getLimit()79         public int getLimit() {
80             return limit;
81         }
82 
getStart()83         public int getStart() {
84             return start;
85         }
86     }
87 
88     /**
89      * Creates a {@code Bidi} object from the {@code
90      * AttributedCharacterIterator} of a paragraph text. The RUN_DIRECTION
91      * attribute determines the base direction of the bidirectional text. If it
92      * is not specified explicitly, the algorithm uses
93      * DIRECTION_DEFAULT_LEFT_TO_RIGHT by default. The BIDI_EMBEDDING attribute
94      * specifies the level of embedding for each character. Values between -1
95      * and -62 denote overrides at the level's absolute value, values from 1 to
96      * 62 indicate embeddings, and the 0 value indicates the level is calculated
97      * by the algorithm automatically. For the character with no BIDI_EMBEDDING
98      * attribute or with a improper attribute value, such as a {@code null}
99      * value, the algorithm treats its embedding level as 0. The NUMERIC_SHAPING
100      * attribute specifies the instance of NumericShaper used to convert
101      * European digits to other decimal digits before performing the bidi
102      * algorithm.
103      *
104      * @param paragraph
105      *            the String containing the paragraph text to perform the
106      *            algorithm.
107      * @throws IllegalArgumentException if {@code paragraph == null}
108      * @see java.awt.font.TextAttribute#BIDI_EMBEDDING
109      * @see java.awt.font.TextAttribute#NUMERIC_SHAPING
110      * @see java.awt.font.TextAttribute#RUN_DIRECTION
111      */
Bidi(AttributedCharacterIterator paragraph)112     public Bidi(AttributedCharacterIterator paragraph) {
113         if (paragraph == null) {
114             throw new IllegalArgumentException("paragraph is null");
115         }
116 
117         int begin = paragraph.getBeginIndex();
118         int end = paragraph.getEndIndex();
119         int length = end - begin;
120         char[] text = new char[length + 1]; // One more char for AttributedCharacterIterator.DONE
121 
122         if (length != 0) {
123             text[0] = paragraph.first();
124         } else {
125             paragraph.first();
126         }
127 
128         // First check the RUN_DIRECTION attribute.
129         int flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
130         Object direction = paragraph.getAttribute(TextAttribute.RUN_DIRECTION);
131         if (direction != null && direction instanceof Boolean) {
132             if (direction.equals(TextAttribute.RUN_DIRECTION_LTR)) {
133                 flags = DIRECTION_LEFT_TO_RIGHT;
134             } else {
135                 flags = DIRECTION_RIGHT_TO_LEFT;
136             }
137         }
138 
139         // Retrieve the text and gather BIDI_EMBEDDINGS
140         byte[] embeddings = null;
141         for (int textLimit = 1, i = 1; i < length; textLimit = paragraph
142                 .getRunLimit(TextAttribute.BIDI_EMBEDDING)
143                 - begin + 1) {
144             Object embedding = paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING);
145             if (embedding != null && embedding instanceof Integer) {
146                 int embLevel = ((Integer) embedding).intValue();
147 
148                 if (embeddings == null) {
149                     embeddings = new byte[length];
150                 }
151 
152                 for (; i < textLimit; i++) {
153                     text[i] = paragraph.next();
154                     embeddings[i - 1] = (byte) embLevel;
155                 }
156             } else {
157                 for (; i < textLimit; i++) {
158                     text[i] = paragraph.next();
159                 }
160             }
161         }
162 
163         // Apply NumericShaper to the text
164         Object numericShaper = paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING);
165         if (numericShaper != null && numericShaper instanceof NumericShaper) {
166             ((NumericShaper) numericShaper).shape(text, 0, length);
167         }
168 
169         long bidi = 0;
170         try {
171             bidi = createUBiDi(text, 0, embeddings, 0, length, flags);
172             readBidiInfo(bidi);
173         } finally {
174             ubidi_close(bidi);
175         }
176     }
177 
178     /**
179      * Creates a {@code Bidi} object.
180      *
181      * @param text
182      *            the char array of the paragraph text that is processed.
183      * @param textStart
184      *            the index in {@code text} of the start of the paragraph.
185      * @param embeddings
186      *            the embedding level array of the paragraph text, specifying
187      *            the embedding level information for each character. Values
188      *            between -1 and -61 denote overrides at the level's absolute
189      *            value, values from 1 to 61 indicate embeddings, and the 0
190      *            value indicates the level is calculated by the algorithm
191      *            automatically.
192      * @param embStart
193      *            the index in {@code embeddings} of the start of the paragraph.
194      * @param paragraphLength
195      *            the length of the text to perform the algorithm.
196      * @param flags
197      *            indicates the base direction of the bidirectional text. It is
198      *            expected that this will be one of the direction constant
199      *            values defined in this class. An unknown value is treated as
200      *            DIRECTION_DEFAULT_LEFT_TO_RIGHT.
201      * @throws IllegalArgumentException
202      *             if {@code textStart}, {@code embStart}, or {@code
203      *             paragraphLength} is negative; if
204      *             {@code text.length < textStart + paragraphLength} or
205      *             {@code embeddings.length < embStart + paragraphLength}.
206      * @see #DIRECTION_LEFT_TO_RIGHT
207      * @see #DIRECTION_RIGHT_TO_LEFT
208      * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
209      * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
210      */
Bidi(char[] text, int textStart, byte[] embeddings, int embStart, int paragraphLength, int flags)211     public Bidi(char[] text, int textStart, byte[] embeddings, int embStart,
212             int paragraphLength, int flags) {
213 
214         if (text == null || text.length - textStart < paragraphLength) {
215             throw new IllegalArgumentException();
216         }
217 
218         if (embeddings != null) {
219             if (embeddings.length - embStart < paragraphLength) {
220                 throw new IllegalArgumentException();
221             }
222         }
223 
224         if (textStart < 0) {
225             throw new IllegalArgumentException("Negative textStart value " + textStart);
226         }
227         if (embStart < 0) {
228             throw new IllegalArgumentException("Negative embStart value " + embStart);
229         }
230         if (paragraphLength < 0) {
231             throw new IllegalArgumentException("Negative paragraph length " + paragraphLength);
232         }
233 
234         long bidi = 0;
235         try {
236             bidi = createUBiDi(text, textStart, embeddings, embStart, paragraphLength, flags);
237             readBidiInfo(bidi);
238         } finally {
239             ubidi_close(bidi);
240         }
241     }
242 
243     /**
244      * Creates a {@code Bidi} object.
245      *
246      * @param paragraph
247      *            the string containing the paragraph text to perform the
248      *            algorithm on.
249      * @param flags
250      *            indicates the base direction of the bidirectional text. It is
251      *            expected that this will be one of the direction constant
252      *            values defined in this class. An unknown value is treated as
253      *            DIRECTION_DEFAULT_LEFT_TO_RIGHT.
254      * @see #DIRECTION_LEFT_TO_RIGHT
255      * @see #DIRECTION_RIGHT_TO_LEFT
256      * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
257      * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
258      */
Bidi(String paragraph, int flags)259     public Bidi(String paragraph, int flags) {
260         this((paragraph == null ? null : paragraph.toCharArray()), 0, null, 0,
261                 (paragraph == null ? 0 : paragraph.length()), flags);
262     }
263 
264     // create the native UBiDi struct, need to be closed with ubidi_close().
createUBiDi(char[] text, int textStart, byte[] embeddings, int embStart, int paragraphLength, int flags)265     private static long createUBiDi(char[] text, int textStart,
266             byte[] embeddings, int embStart, int paragraphLength, int flags) {
267         char[] realText = null;
268 
269         byte[] realEmbeddings = null;
270 
271         if (text == null || text.length - textStart < paragraphLength) {
272             throw new IllegalArgumentException();
273         }
274         realText = new char[paragraphLength];
275         System.arraycopy(text, textStart, realText, 0, paragraphLength);
276 
277         if (embeddings != null) {
278             if (embeddings.length - embStart < paragraphLength) {
279                 throw new IllegalArgumentException();
280             }
281             if (paragraphLength > 0) {
282                 Bidi temp = new Bidi(text, textStart, null, 0, paragraphLength, flags);
283                 realEmbeddings = new byte[paragraphLength];
284                 System.arraycopy(temp.offsetLevel, 0, realEmbeddings, 0, paragraphLength);
285                 for (int i = 0; i < paragraphLength; i++) {
286                     byte e = embeddings[i];
287                     if (e < 0) {
288                         realEmbeddings[i] = (byte) (UBIDI_LEVEL_OVERRIDE - e);
289                     } else if (e > 0) {
290                         realEmbeddings[i] = e;
291                     } else {
292                         realEmbeddings[i] |= (byte) UBIDI_LEVEL_OVERRIDE;
293                     }
294                 }
295             }
296         }
297 
298         if (flags > 1 || flags < -2) {
299             flags = 0;
300         }
301 
302         long bidi = 0;
303         boolean needsDeletion = true;
304         try {
305             bidi = ubidi_open();
306             ubidi_setPara(bidi, realText, paragraphLength, flags, realEmbeddings);
307             needsDeletion = false;
308         } finally {
309             if (needsDeletion) {
310                 ubidi_close(bidi);
311             }
312         }
313         return bidi;
314     }
315 
316     /* private constructor used by createLineBidi() */
Bidi(long pBidi)317     private Bidi(long pBidi) {
318         readBidiInfo(pBidi);
319     }
320 
321     // read info from the native UBiDi struct
readBidiInfo(long pBidi)322     private void readBidiInfo(long pBidi) {
323         length = ubidi_getLength(pBidi);
324 
325         offsetLevel = (length == 0) ? null : ubidi_getLevels(pBidi);
326 
327         baseLevel = ubidi_getParaLevel(pBidi);
328 
329         int runCount = ubidi_countRuns(pBidi);
330         if (runCount == 0) {
331             unidirectional = true;
332             runs = null;
333         } else if (runCount < 0) {
334             runs = null;
335         } else {
336             runs = ubidi_getRuns(pBidi);
337 
338             // Simplified case for one run which has the base level
339             if (runCount == 1 && runs[0].getLevel() == baseLevel) {
340                 unidirectional = true;
341                 runs = null;
342             }
343         }
344 
345         direction = ubidi_getDirection(pBidi);
346     }
347 
348     private int baseLevel;
349 
350     private int length;
351 
352     private byte[] offsetLevel;
353 
354     private Run[] runs;
355 
356     private int direction;
357 
358     private boolean unidirectional;
359 
360     /**
361      * Returns whether the base level is from left to right.
362      *
363      * @return true if the base level is from left to right.
364      */
baseIsLeftToRight()365     public boolean baseIsLeftToRight() {
366         return baseLevel % 2 == 0 ? true : false;
367     }
368 
369     /**
370      * Creates a new {@code Bidi} object containing the information of one line
371      * from this object.
372      *
373      * @param lineStart
374      *            the start offset of the line.
375      * @param lineLimit
376      *            the limit of the line.
377      * @return the new line Bidi object. In this new object, the indices will
378      *         range from 0 to (limit - start - 1).
379      * @throws IllegalArgumentException
380      *             if {@code lineStart < 0}, {@code lineLimit < 0}, {@code
381      *             lineStart > lineLimit} or if {@code lineStart} is greater
382      *             than the length of this object's paragraph text.
383      */
createLineBidi(int lineStart, int lineLimit)384     public Bidi createLineBidi(int lineStart, int lineLimit) {
385         if (lineStart < 0 || lineLimit < 0 || lineLimit > length || lineStart > lineLimit) {
386             throw new IllegalArgumentException("Invalid ranges (start=" + lineStart + ", " +
387                     "limit=" + lineLimit + ", length=" + length + ")");
388         }
389 
390         char[] text = new char[this.length];
391         Arrays.fill(text, 'a');
392         byte[] embeddings = new byte[this.length];
393         for (int i = 0; i < embeddings.length; i++) {
394             embeddings[i] = (byte) -this.offsetLevel[i];
395         }
396 
397         int dir = this.baseIsLeftToRight()
398                 ? Bidi.DIRECTION_LEFT_TO_RIGHT
399                 : Bidi.DIRECTION_RIGHT_TO_LEFT;
400         long parent = 0;
401         try {
402             parent = createUBiDi(text, 0, embeddings, 0, this.length, dir);
403             if (lineStart == lineLimit) {
404                 return createEmptyLineBidi(parent);
405             }
406             return new Bidi(ubidi_setLine(parent, lineStart, lineLimit));
407         } finally {
408             ubidi_close(parent);
409         }
410     }
411 
createEmptyLineBidi(long parent)412     private Bidi createEmptyLineBidi(long parent) {
413         // ICU4C doesn't allow this case, but the RI does.
414         Bidi result = new Bidi(parent);
415         result.length = 0;
416         result.offsetLevel = null;
417         result.runs = null;
418         result.unidirectional = true;
419         return result;
420     }
421 
422     /**
423      * Returns the base level.
424      *
425      * @return the base level.
426      */
getBaseLevel()427     public int getBaseLevel() {
428         return baseLevel;
429     }
430 
431     /**
432      * Returns the length of the text in the {@code Bidi} object.
433      *
434      * @return the length.
435      */
getLength()436     public int getLength() {
437         return length;
438     }
439 
440     /**
441      * Returns the level of a specified character.
442      *
443      * @param offset
444      *            the offset of the character.
445      * @return the level.
446      */
getLevelAt(int offset)447     public int getLevelAt(int offset) {
448         try {
449             return offsetLevel[offset] & ~UBIDI_LEVEL_OVERRIDE;
450         } catch (RuntimeException e) {
451             return baseLevel;
452         }
453     }
454 
455     /**
456      * Returns the number of runs in the bidirectional text.
457      *
458      * @return the number of runs, at least 1.
459      */
getRunCount()460     public int getRunCount() {
461         return unidirectional ? 1 : runs.length;
462     }
463 
464     /**
465      * Returns the level of the specified run.
466      *
467      * @param run
468      *            the index of the run.
469      * @return the level of the run.
470      */
getRunLevel(int run)471     public int getRunLevel(int run) {
472         return unidirectional ? baseLevel : runs[run].getLevel();
473     }
474 
475     /**
476      * Returns the limit offset of the specified run.
477      *
478      * @param run
479      *            the index of the run.
480      * @return the limit offset of the run.
481      */
getRunLimit(int run)482     public int getRunLimit(int run) {
483         return unidirectional ? length : runs[run].getLimit();
484     }
485 
486     /**
487      * Returns the start offset of the specified run.
488      *
489      * @param run
490      *            the index of the run.
491      * @return the start offset of the run.
492      */
getRunStart(int run)493     public int getRunStart(int run) {
494         return unidirectional ? 0 : runs[run].getStart();
495     }
496 
497     /**
498      * Indicates whether the text is from left to right, that is, both the base
499      * direction and the text direction is from left to right.
500      *
501      * @return {@code true} if the text is from left to right; {@code false}
502      *         otherwise.
503      */
isLeftToRight()504     public boolean isLeftToRight() {
505         return direction == UBiDiDirection_UBIDI_LTR;
506     }
507 
508     /**
509      * Indicates whether the text direction is mixed.
510      *
511      * @return {@code true} if the text direction is mixed; {@code false}
512      *         otherwise.
513      */
isMixed()514     public boolean isMixed() {
515         return direction == UBiDiDirection_UBIDI_MIXED;
516     }
517 
518     /**
519      * Indicates whether the text is from right to left, that is, both the base
520      * direction and the text direction is from right to left.
521      *
522      * @return {@code true} if the text is from right to left; {@code false}
523      *         otherwise.
524      */
isRightToLeft()525     public boolean isRightToLeft() {
526         return direction == UBiDiDirection_UBIDI_RTL;
527     }
528 
529     /**
530      * Reorders a range of objects according to their specified levels. This is
531      * a convenience function that does not use a {@code Bidi} object. The range
532      * of objects at {@code index} from {@code objectStart} to {@code
533      * objectStart + count} will be reordered according to the range of levels
534      * at {@code index} from {@code levelStart} to {@code levelStart + count}.
535      *
536      * @param levels
537      *            the level array, which is already determined.
538      * @param levelStart
539      *            the start offset of the range of the levels.
540      * @param objects
541      *            the object array to reorder.
542      * @param objectStart
543      *            the start offset of the range of objects.
544      * @param count
545      *            the count of the range of objects to reorder.
546      * @throws IllegalArgumentException
547      *             if {@code count}, {@code levelStart} or {@code objectStart}
548      *             is negative; if {@code count > levels.length - levelStart} or
549      *             if {@code count > objects.length - objectStart}.
550      */
reorderVisually(byte[] levels, int levelStart, Object[] objects, int objectStart, int count)551     public static void reorderVisually(byte[] levels, int levelStart,
552             Object[] objects, int objectStart, int count) {
553         if (count < 0 || levelStart < 0 || objectStart < 0
554                 || count > levels.length - levelStart
555                 || count > objects.length - objectStart) {
556             throw new IllegalArgumentException("Invalid ranges (levels=" + levels.length +
557                     ", levelStart=" + levelStart + ", objects=" + objects.length +
558                     ", objectStart=" + objectStart + ", count=" + count + ")");
559         }
560 
561         byte[] realLevels = new byte[count];
562         System.arraycopy(levels, levelStart, realLevels, 0, count);
563 
564         int[] indices = ubidi_reorderVisual(realLevels, count);
565 
566         ArrayList<Object> result = new ArrayList<Object>(count);
567         for (int i = 0; i < count; i++) {
568             result.add(objects[objectStart + indices[i]]);
569         }
570 
571         System.arraycopy(result.toArray(), 0, objects, objectStart, count);
572     }
573 
574     /**
575      * Indicates whether a range of characters of a text requires a {@code Bidi}
576      * object to display properly.
577      *
578      * @param text
579      *            the char array of the text.
580      * @param start
581      *            the start offset of the range of characters.
582      * @param limit
583      *            the limit offset of the range of characters.
584      * @return {@code true} if the range of characters requires a {@code Bidi}
585      *         object; {@code false} otherwise.
586      * @throws IllegalArgumentException
587      *             if {@code start} or {@code limit} is negative; {@code start >
588      *             limit} or {@code limit} is greater than the length of this
589      *             object's paragraph text.
590      */
requiresBidi(char[] text, int start, int limit)591     public static boolean requiresBidi(char[] text, int start, int limit) {
592         if (limit < 0 || start < 0 || start > limit || limit > text.length) {
593             throw new IllegalArgumentException();
594         }
595 
596         Bidi bidi = new Bidi(text, start, null, 0, limit - start, 0);
597         return !bidi.isLeftToRight();
598     }
599 
600     @Override
toString()601     public String toString() {
602         return getClass().getName()
603                 + "[direction: " + direction + " baseLevel: " + baseLevel
604                 + " length: " + length + " runs: " + Arrays.toString(runs) + "]";
605     }
606 
607     // ICU4C constants.
608     private static final int UBIDI_LEVEL_OVERRIDE = 0x80;
609     private static final int UBiDiDirection_UBIDI_LTR = 0;
610     private static final int UBiDiDirection_UBIDI_RTL = 1;
611     private static final int UBiDiDirection_UBIDI_MIXED = 2;
612 
613     // ICU4C functions.
ubidi_open()614     private static native long ubidi_open();
ubidi_close(long pBiDi)615     private static native void ubidi_close(long pBiDi);
ubidi_setPara(long pBiDi, char[] text, int length, int paraLevel, byte[] embeddingLevels)616     private static native void ubidi_setPara(long pBiDi, char[] text, int length, int paraLevel, byte[] embeddingLevels);
ubidi_setLine(final long pParaBiDi, int start, int limit)617     private static native long ubidi_setLine(final long pParaBiDi, int start, int limit);
ubidi_getDirection(final long pBiDi)618     private static native int ubidi_getDirection(final long pBiDi);
ubidi_getLength(final long pBiDi)619     private static native int ubidi_getLength(final long pBiDi);
ubidi_getParaLevel(final long pBiDi)620     private static native byte ubidi_getParaLevel(final long pBiDi);
ubidi_getLevels(long pBiDi)621     private static native byte[] ubidi_getLevels(long pBiDi);
ubidi_countRuns(long pBiDi)622     private static native int ubidi_countRuns(long pBiDi);
ubidi_getRuns(long pBidi)623     private static native Bidi.Run[] ubidi_getRuns(long pBidi);
ubidi_reorderVisual(byte[] levels, int length)624     private static native int[] ubidi_reorderVisual(byte[] levels, int length);
625 }
626