1 package org.unicode.cldr.util; 2 3 import com.google.common.collect.ImmutableSet; 4 import com.google.common.collect.Sets; 5 import com.google.common.collect.Sets.SetView; 6 import com.ibm.icu.text.Bidi; 7 import com.ibm.icu.text.UnicodeSet; 8 import com.ibm.icu.text.UnicodeSet.SpanCondition; 9 import java.util.ArrayList; 10 import java.util.Iterator; 11 import java.util.LinkedHashSet; 12 import java.util.List; 13 import java.util.Set; 14 import java.util.TreeSet; 15 import java.util.stream.Collectors; 16 17 /** 18 * A set of utilities for handling BIDI, especially in charts and examples but not restricted to 19 * that. 20 */ 21 public class BidiUtils { 22 public static final String ALERT = "⚠️"; 23 static final String LRM = CodePointEscaper.LRM.getString(); 24 25 // These are intended to be classes of characters that "stick together in order" 26 // The initial focus is dates, so this will probably need to be expanded for numbers; might need 27 // more syntax 28 29 private enum SpanClass { 30 NUMBERS("\\p{N}"), 31 LETTERS_MARKS("[\\p{L}\\p{M}]"), 32 DATE_PUNCT("[+]"), 33 SPACES("\\p{Z}"), 34 OTHERS("\\p{any}") // must be last, to pick up remainder. 35 ; 36 final UnicodeSet uset; 37 SpanClass(String unicodeSetSource)38 private SpanClass(String unicodeSetSource) { 39 uset = new UnicodeSet(unicodeSetSource); 40 } 41 42 static { 43 // clean up by removing previous values 44 UnicodeSet soFar = new UnicodeSet(); 45 for (SpanClass sc : SpanClass.values()) { 46 sc.uset.removeAll(soFar).freeze(); 47 soFar.addAll(sc.uset); 48 } 49 } 50 } 51 /** 52 * Checks the ordering of the example, under the specified bidiDirectionOptions; 53 * 54 * @param example Source text, not HTMLified 55 * @param outputReorderedResults One string for each specified bidiDirectionOption 56 * @param bidiDirectionOptions an array of BIDI directions from com.ibm.icu.text.Bidi. if there 57 * are no items, the default is DIRECTION_DEFAULT_LEFT_TO_RIGHT (dir="auto"), 58 * DIRECTION_RIGHT_TO_LEFT (dir="rtl"). 59 * @return true unless two or more of the resulting strings are different. 60 */ isOrderingUnchanged( String example, List<String> outputReorderedResults, int... bidiDirectionOptions)61 public static boolean isOrderingUnchanged( 62 String example, List<String> outputReorderedResults, int... bidiDirectionOptions) { 63 boolean hasList = outputReorderedResults != null; 64 if (!hasList) { 65 outputReorderedResults = new ArrayList<>(); 66 } else { 67 outputReorderedResults.clear(); 68 } 69 boolean result = true; 70 for (int count = 0; count < bidiDirectionOptions.length; ++count) { 71 String reordered = new Bidi(example, bidiDirectionOptions[count]).writeReordered(0); 72 outputReorderedResults.add(reordered); 73 if (result && count != 0 && !reordered.equals(outputReorderedResults.get(0))) { 74 result = false; 75 if (!hasList) { 76 break; // if the output results are not needed, then stop. 77 } 78 } 79 } 80 return result; 81 } 82 83 /** 84 * Return a list of the , where each span is a sequence of: 85 * 86 * @param orderedLTR 87 * @return 88 */ 89 /** 90 * Gets the 'fields' in a formatted string, used to test whether bidi reordering causes the 91 * original fields to merge when reordered. Each field is the longest contiguous span of 92 * characters with the same properties: * 93 * 94 * <ul> 95 * <li>numbers (\p{N}) 96 * <li>letters & marks ([\p{L}\p{M} 97 * <li>Other 98 * </ul> 99 * 100 * @param ordered 101 * @return a set of fields, in the same order as found in the text but duplicates removed (ike 102 * LinkedHashSeet). 103 */ getFields(String reordred, Set<String> result)104 public static Set<String> getFields(String reordred, Set<String> result) { 105 int start = 0; 106 while (start < reordred.length()) { 107 for (SpanClass sc : SpanClass.values()) { 108 int end = sc.uset.span(reordred, start, SpanCondition.CONTAINED); 109 if (end != start) { 110 result.add(reordred.substring(start, end)); 111 start = end; 112 break; 113 } 114 } 115 } 116 return ImmutableSet.copyOf(result); 117 } 118 119 /** 120 * Show when the fields in strings are different 121 * 122 * @param bidiReordereds 123 * @return 124 */ getAlert(List<String> bidiReordereds)125 public static String getAlert(List<String> bidiReordereds) { 126 Set<Set<String>> results = new LinkedHashSet<>(); 127 for (String bidiReordered : bidiReordereds) { 128 Set<String> fieldsLTR = BidiUtils.getFields(bidiReordered, new TreeSet<>()); 129 results.add(fieldsLTR); 130 } 131 if (results.size() < 2) { 132 return ""; 133 } 134 // there can still be differences within a field of OTHERS, that we ignore. 135 // EG ⚠️ 20,28,2B; 2B,28,20 " (+" vs " (+" 136 137 // show just the difference in the first 2, for now. 138 Iterator<Set<String>> it = results.iterator(); 139 Set<String> first = it.next(); 140 Set<String> second = it.next(); 141 SetView<String> uniqueFirst = Sets.difference(first, second); 142 SetView<String> uniqueSecond = Sets.difference(second, first); 143 return ALERT + " " + escape(uniqueFirst) + "; " + escape(uniqueSecond); 144 } 145 escape(Set<String> uniqueFirst)146 public static String escape(Set<String> uniqueFirst) { 147 return uniqueFirst.stream() 148 .map(x -> CodePointEscaper.toEscaped(x)) 149 .collect(Collectors.joining(LRM + ", " + LRM, LRM, LRM)); 150 } 151 alphagram(String string)152 public static String alphagram(String string) { 153 return string.codePoints() 154 .sorted() 155 .collect( 156 StringBuilder::new, // Supplier<R> supplier 157 StringBuilder::appendCodePoint, // ObjIntConsumer<R> accumulator 158 StringBuilder::append // BiConsumer<R,R> combiner 159 ) 160 .toString(); 161 } 162 } 163