• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import com.google.common.collect.ImmutableSet;
4 import com.google.common.collect.Sets;
5 import com.google.common.collect.Sets.SetView;
6 import com.ibm.icu.text.Bidi;
7 import com.ibm.icu.text.UnicodeSet;
8 import com.ibm.icu.text.UnicodeSet.SpanCondition;
9 import java.util.ArrayList;
10 import java.util.Iterator;
11 import java.util.LinkedHashSet;
12 import java.util.List;
13 import java.util.Set;
14 import java.util.TreeSet;
15 import java.util.stream.Collectors;
16 
17 /**
18  * A set of utilities for handling BIDI, especially in charts and examples but not restricted to
19  * that.
20  */
21 public class BidiUtils {
22     public static final String ALERT = "⚠️";
23     static final String LRM = CodePointEscaper.LRM.getString();
24 
25     // These are intended to be classes of characters that "stick together in order"
26     // The initial focus is dates, so this will probably need to be expanded for numbers; might need
27     // more syntax
28 
29     private enum SpanClass {
30         NUMBERS("\\p{N}"),
31         LETTERS_MARKS("[\\p{L}\\p{M}]"),
32         DATE_PUNCT("[+]"),
33         SPACES("\\p{Z}"),
34         OTHERS("\\p{any}") // must be last, to pick up remainder.
35     ;
36         final UnicodeSet uset;
37 
SpanClass(String unicodeSetSource)38         private SpanClass(String unicodeSetSource) {
39             uset = new UnicodeSet(unicodeSetSource);
40         }
41 
42         static {
43             // clean up by removing previous values
44             UnicodeSet soFar = new UnicodeSet();
45             for (SpanClass sc : SpanClass.values()) {
46                 sc.uset.removeAll(soFar).freeze();
47                 soFar.addAll(sc.uset);
48             }
49         }
50     }
51     /**
52      * Checks the ordering of the example, under the specified bidiDirectionOptions;
53      *
54      * @param example Source text, not HTMLified
55      * @param outputReorderedResults One string for each specified bidiDirectionOption
56      * @param bidiDirectionOptions an array of BIDI directions from com.ibm.icu.text.Bidi. if there
57      *     are no items, the default is DIRECTION_DEFAULT_LEFT_TO_RIGHT (dir="auto"),
58      *     DIRECTION_RIGHT_TO_LEFT (dir="rtl").
59      * @return true unless two or more of the resulting strings are different.
60      */
isOrderingUnchanged( String example, List<String> outputReorderedResults, int... bidiDirectionOptions)61     public static boolean isOrderingUnchanged(
62             String example, List<String> outputReorderedResults, int... bidiDirectionOptions) {
63         boolean hasList = outputReorderedResults != null;
64         if (!hasList) {
65             outputReorderedResults = new ArrayList<>();
66         } else {
67             outputReorderedResults.clear();
68         }
69         boolean result = true;
70         for (int count = 0; count < bidiDirectionOptions.length; ++count) {
71             String reordered = new Bidi(example, bidiDirectionOptions[count]).writeReordered(0);
72             outputReorderedResults.add(reordered);
73             if (result && count != 0 && !reordered.equals(outputReorderedResults.get(0))) {
74                 result = false;
75                 if (!hasList) {
76                     break; // if the output results are not needed, then stop.
77                 }
78             }
79         }
80         return result;
81     }
82 
83     /**
84      * Return a list of the , where each span is a sequence of:
85      *
86      * @param orderedLTR
87      * @return
88      */
89     /**
90      * Gets the 'fields' in a formatted string, used to test whether bidi reordering causes the
91      * original fields to merge when reordered. Each field is the longest contiguous span of
92      * characters with the same properties: *
93      *
94      * <ul>
95      *   <li>numbers (\p{N})
96      *   <li>letters & marks ([\p{L}\p{M}
97      *   <li>Other
98      * </ul>
99      *
100      * @param ordered
101      * @return a set of fields, in the same order as found in the text but duplicates removed (ike
102      *     LinkedHashSeet).
103      */
getFields(String reordred, Set<String> result)104     public static Set<String> getFields(String reordred, Set<String> result) {
105         int start = 0;
106         while (start < reordred.length()) {
107             for (SpanClass sc : SpanClass.values()) {
108                 int end = sc.uset.span(reordred, start, SpanCondition.CONTAINED);
109                 if (end != start) {
110                     result.add(reordred.substring(start, end));
111                     start = end;
112                     break;
113                 }
114             }
115         }
116         return ImmutableSet.copyOf(result);
117     }
118 
119     /**
120      * Show when the fields in strings are different
121      *
122      * @param bidiReordereds
123      * @return
124      */
getAlert(List<String> bidiReordereds)125     public static String getAlert(List<String> bidiReordereds) {
126         Set<Set<String>> results = new LinkedHashSet<>();
127         for (String bidiReordered : bidiReordereds) {
128             Set<String> fieldsLTR = BidiUtils.getFields(bidiReordered, new TreeSet<>());
129             results.add(fieldsLTR);
130         }
131         if (results.size() < 2) {
132             return "";
133         }
134         // there can still be differences within a field of OTHERS, that we  ignore.
135         // EG ⚠️ 20,28,2B; 2B,28,20 " (+" vs " (+"
136 
137         // show just the difference in the first 2, for now.
138         Iterator<Set<String>> it = results.iterator();
139         Set<String> first = it.next();
140         Set<String> second = it.next();
141         SetView<String> uniqueFirst = Sets.difference(first, second);
142         SetView<String> uniqueSecond = Sets.difference(second, first);
143         return ALERT + " " + escape(uniqueFirst) + "; " + escape(uniqueSecond);
144     }
145 
escape(Set<String> uniqueFirst)146     public static String escape(Set<String> uniqueFirst) {
147         return uniqueFirst.stream()
148                 .map(x -> CodePointEscaper.toEscaped(x))
149                 .collect(Collectors.joining(LRM + ", " + LRM, LRM, LRM));
150     }
151 
alphagram(String string)152     public static String alphagram(String string) {
153         return string.codePoints()
154                 .sorted()
155                 .collect(
156                         StringBuilder::new, // Supplier<R> supplier
157                         StringBuilder::appendCodePoint, // ObjIntConsumer<R> accumulator
158                         StringBuilder::append // BiConsumer<R,​R> combiner
159                         )
160                 .toString();
161     }
162 }
163