• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.ImmutableSet;
5 import com.google.common.collect.ImmutableSetMultimap;
6 import com.google.common.collect.ImmutableSortedMap;
7 import com.google.common.collect.Multimap;
8 import com.google.common.collect.Sets;
9 import com.google.common.collect.Sets.SetView;
10 import com.google.common.collect.TreeMultimap;
11 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap;
12 import com.ibm.icu.util.VersionInfo;
13 import java.util.ArrayList;
14 import java.util.Collection;
15 import java.util.LinkedHashMap;
16 import java.util.List;
17 import java.util.Map;
18 import java.util.Map.Entry;
19 import java.util.Set;
20 import java.util.SortedMap;
21 import java.util.TreeMap;
22 import java.util.TreeSet;
23 import java.util.stream.Collectors;
24 import org.unicode.cldr.tool.ToolConstants;
25 import org.unicode.cldr.util.StandardCodes.LstrType;
26 import org.unicode.cldr.util.Validity.Status;
27 
28 public class DiffLanguageGroups {
29     private static final Joiner JOIN_TAB = Joiner.on('\t');
30     private static final String IN = " ➡︎ ";
31     static final CLDRConfig CONFIG = CLDRConfig.getInstance();
32     static final SupplementalDataInfo SDI = CONFIG.getSupplementalDataInfo();
33     static final CLDRFile ENGLISH = CONFIG.getEnglish();
34 
35     enum LanguageStatus {
36         TC("TC"),
37         OTHER_BASIC_PLUS("OB"),
38         OTHER_CLDR("OC"),
39         NON_CLDR("NC"),
40         NON_REGULAR("XX");
41 
42         public final String abbr;
43 
LanguageStatus(String s)44         private LanguageStatus(String s) {
45             abbr = s;
46         }
47 
48         static final Set<LanguageStatus> SKIP_MISSING =
49                 Set.of(LanguageStatus.NON_CLDR, LanguageStatus.NON_REGULAR);
50     }
51 
52     static final Map<String, LanguageStatus> LanguageToStatus;
53     static final Map<LanguageStatus, Set<String>> StatusToLanguages;
54 
55     static {
56         // add items to the map, most general first so the others can override
57         Map<String, LanguageStatus> temp = new TreeMap<>();
58 
59         Sets.union(
60                         Validity.getInstance()
61                                 .getStatusToCodes(LstrType.language)
62                                 .get(Status.regular),
63                         Set.of("mul"))
64                 .stream()
65                 .forEach(x -> temp.put(x, LanguageStatus.NON_CLDR));
66 
67         CONFIG.getCldrFactory().getAvailableLanguages().stream()
68                 .forEach(
69                         x -> {
70                             if (!x.contains("_") && !x.equals("root"))
71                                 temp.put(x, LanguageStatus.OTHER_CLDR);
72                         });
73 
74         CalculatedCoverageLevels.getInstance().getLevels().entrySet().stream()
75                 .forEach(
76                         x -> {
77                             if (!x.getKey().contains("_"))
78                                 temp.put(x.getKey(), LanguageStatus.OTHER_BASIC_PLUS);
79                         });
80 
81         Sets.difference(
82                         StandardCodes.make().getLocaleCoverageLocales(Organization.cldr),
83                         StandardCodes.make().getLocaleCoverageLocales(Organization.special))
84                 .stream()
85                 .forEach(
86                         x -> {
87                             if (!x.contains("_")) temp.put(x, LanguageStatus.TC);
88                         });
89         LanguageToStatus = ImmutableMap.copyOf(temp);
90 
91         Multimap<LanguageStatus, String> temp2 = TreeMultimap.create();
92         LanguageToStatus.entrySet().stream().forEach(x -> temp2.put(x.getValue(), x.getKey()));
93         Map<LanguageStatus, Set<String>> temp3 = new LinkedHashMap<>();
94         temp2.asMap().entrySet().forEach(x -> temp3.put(x.getKey(), new TreeSet<>(x.getValue())));
95         StatusToLanguages = CldrUtility.protectCollection(temp3);
96     }
97 
getStatusForLanguage(String joint)98     public static LanguageStatus getStatusForLanguage(String joint) {
99         return CldrUtility.ifNull(LanguageToStatus.get(joint), LanguageStatus.NON_REGULAR);
100     }
101 
102     static String OLD = "OLD";
103     static String NEW = "NEW";
104 
main(String[] args)105     public static void main(String[] args) {
106         System.out.println(
107                 "Args are OLD and NEW CLDR versions. Defaults: OLD = last release, NEW = current data. Format is X.Y, eg: "
108                         + ToolConstants.LAST_RELEASE_VERSION_WITH0);
109         System.out.println("\nReading old supplemental: may have unrelated errors.");
110         final SupplementalDataInfo oldSupplementalInfo =
111                 SupplementalDataInfo.getInstance(
112                         CldrUtility.getPath(CLDRPaths.LAST_COMMON_DIRECTORY, "supplemental/"));
113         System.out.println();
114 
115         VersionInfo oldVersion = oldSupplementalInfo.getCldrVersion();
116 
117         String oldBase = ToolConstants.LAST_RELEASE_VERSION_WITH0;
118         String newBase = null;
119 
120         if (args.length > 0) {
121             oldBase = args[0];
122             if (args.length > 1) {
123                 newBase = args[1];
124             }
125         }
126         String oldPath =
127                 CLDRPaths.ARCHIVE_DIRECTORY
128                         + "cldr-"
129                         + oldBase
130                         + "/common/supplemental/languageGroup.xml";
131         String newPath =
132                 newBase == null
133                         ? CLDRPaths.COMMON_DIRECTORY + "supplemental/languageGroup.xml"
134                         : CLDRPaths.ARCHIVE_DIRECTORY
135                                 + "cldr-"
136                                 + newBase
137                                 + "/common/supplemental/languageGroup.xml";
138 
139         OLD = "v" + oldVersion.getVersionString(1, 2);
140         NEW = "V" + SDI.getCldrVersion().getVersionString(1, 2);
141 
142         System.out.println("* KEY");
143         for (LanguageStatus status : LanguageStatus.values()) {
144             System.out.println("\t" + status.abbr + "\t" + status.toString());
145         }
146         System.out.println();
147 
148         // Get OLD information
149 
150         Multimap<String, String> oldErrors = TreeMultimap.create();
151         SortedMap<String, String> oldChildToParent =
152                 invertToMap(loadLanguageGroups(oldPath), oldErrors);
153         if (!oldErrors.isEmpty()) {
154             showErrors(OLD, oldErrors);
155         }
156         Set<String> oldSet = getAllKeysAndValues(oldChildToParent);
157 
158         // Old info
159         for (Entry<LanguageStatus, Set<String>> entry : StatusToLanguages.entrySet()) {
160             checkAgainstReference(OLD, entry.getKey(), entry.getValue(), oldSet);
161         }
162 
163         // get NEW information
164 
165         Multimap<String, String> newErrors = TreeMultimap.create();
166         SortedMap<String, String> newChildToParent =
167                 invertToMap(loadLanguageGroups(newPath), newErrors);
168         if (!newErrors.isEmpty()) {
169             showErrors(NEW, newErrors);
170         }
171 
172         Set<String> newSet = getAllKeysAndValues(newChildToParent);
173         for (Entry<LanguageStatus, Set<String>> entry : StatusToLanguages.entrySet()) {
174             checkAgainstReference(NEW, entry.getKey(), entry.getValue(), newSet);
175         }
176 
177         // Show differences
178 
179         // showDiff("Δ Removing (" + OLD + "-" + NEW + ")", Sets.difference(oldSet, newSet));
180         for (LanguageStatus status : LanguageStatus.values()) {
181             for (String joint : Sets.difference(oldSet, newSet)) {
182                 if (getStatusForLanguage(joint) != status) {
183                     continue;
184                 }
185                 List<String> childToParent = getChain(joint, oldChildToParent, new ArrayList<>());
186                 System.out.println(
187                         JOIN_TAB.join(
188                                 OLD,
189                                 getStatusForLanguage(joint).abbr,
190                                 show(joint),
191                                 "Removed",
192                                 childToParent.stream()
193                                         .map(x -> show(x))
194                                         .collect(Collectors.joining(IN))));
195             }
196         }
197 
198         // showDiff("Δ Adding (" + NEW + "-" + OLD + ")", Sets.difference(newSet, oldSet));
199         for (LanguageStatus status : LanguageStatus.values()) {
200             for (String joint : Sets.difference(newSet, oldSet)) {
201                 if (getStatusForLanguage(joint) != status) {
202                     continue;
203                 }
204                 List<String> childToParent = getChain(joint, newChildToParent, new ArrayList<>());
205                 System.out.println(
206                         JOIN_TAB.join(
207                                 NEW,
208                                 getStatusForLanguage(joint).abbr,
209                                 show(joint),
210                                 "Added",
211                                 childToParent.stream()
212                                         .map(x -> show(x))
213                                         .collect(Collectors.joining(IN))));
214             }
215         }
216 
217         Set<String> changed = new TreeSet<>();
218         for (String joint : Sets.intersection(oldSet, newSet)) {
219             List<String> oldChain = getChain(joint, oldChildToParent, new ArrayList<>());
220             List<String> newChain = getChain(joint, newChildToParent, new ArrayList<>());
221             if (!oldChain.equals(newChain)) {
222                 changed.add(joint);
223             }
224         }
225         // showDiff("Δ Moving (" + OLD + " to " + NEW + ")", changed);
226 
227         for (LanguageStatus status : LanguageStatus.values()) {
228             for (String joint : changed) {
229                 if (getStatusForLanguage(joint) != status) {
230                     continue;
231                 }
232                 List<String> oldChain = getChain(joint, oldChildToParent, new ArrayList<>());
233                 List<String> newChain = getChain(joint, newChildToParent, new ArrayList<>());
234                 System.out.println(
235                         JOIN_TAB.join(
236                                 OLD + "-" + NEW,
237                                 getStatusForLanguage(joint).abbr,
238                                 show(joint),
239                                 "Moved FROM",
240                                 oldChain.stream().map(x -> show(x)).collect(Collectors.joining(IN)),
241                                 "TO",
242                                 newChain.stream()
243                                         .map(x -> show(x))
244                                         .collect(Collectors.joining(IN))));
245             }
246         }
247     }
248 
checkAgainstReference( String version, LanguageStatus languageStatus, Set<String> cldrLanguages, Set<String> oldSet)249     private static void checkAgainstReference(
250             String version,
251             LanguageStatus languageStatus,
252             Set<String> cldrLanguages,
253             Set<String> oldSet) {
254         if (LanguageStatus.SKIP_MISSING.contains(languageStatus)) {
255             return;
256         }
257         SetView<String> missing = Sets.difference(cldrLanguages, oldSet);
258         if (!missing.isEmpty()) {
259             System.out.println(
260                     JOIN_TAB.join(
261                             version,
262                             languageStatus.abbr,
263                             "…",
264                             "Missing",
265                             missing.stream().map(x -> show(x)).collect(Collectors.joining(", "))));
266         }
267     }
268 
showDiff(String title, Set<String> oldMinusOther)269     public static void showDiff(String title, Set<String> oldMinusOther) {
270         if (!oldMinusOther.isEmpty()) {
271             System.out.println(
272                     title
273                             + "\t"
274                             + oldMinusOther.size()
275                             + ":\t"
276                             + oldMinusOther.stream()
277                                     .map(x -> show(x))
278                                     .collect(Collectors.joining(", ")));
279         }
280     }
281 
show(String languageCode)282     public static String show(String languageCode) {
283         return languageCode.equals("mul") ? "Ω" : getName(languageCode) + " ⁅" + languageCode + "⁆";
284     }
285 
getName(String languageCode)286     public static String getName(String languageCode) {
287         String result = ENGLISH.getName(CLDRFile.LANGUAGE_NAME, languageCode);
288         return result == null ? "(no name)" : result.replace(" (Other)", "");
289     }
290 
showErrors(String title, Multimap<String, String> oldErrors)291     public static void showErrors(String title, Multimap<String, String> oldErrors) {
292         for (LanguageStatus status : LanguageStatus.values()) {
293             for (Entry<String, Collection<String>> entry : oldErrors.asMap().entrySet()) {
294                 if (getStatusForLanguage(entry.getKey()) != status) {
295                     continue;
296                 }
297                 System.out.println(
298                         formatMessage(
299                                 title,
300                                 entry.getKey(),
301                                 "Multiple parents",
302                                 entry.getValue().stream()
303                                         .map(x -> show(x))
304                                         .collect(Collectors.joining(" ���� "))));
305             }
306         }
307     }
308 
getChain( String joint, Map<String, String> childToParent, List<String> result)309     private static List<String> getChain(
310             String joint, Map<String, String> childToParent, List<String> result) {
311         String parent = childToParent.get(joint);
312         if (parent == null) {
313             return result;
314         }
315         result.add(parent);
316         return getChain(parent, childToParent, result);
317     }
318 
loadLanguageGroups(String filename)319     public static Multimap<String, String> loadLanguageGroups(String filename) {
320         Multimap<String, String> newParentToChildren = TreeMultimap.create();
321 
322         for (Pair<String, String> item :
323                 XMLFileReader.loadPathValues(
324                         filename, new ArrayList<Pair<String, String>>(), false)) {
325             handleLanguageGroups(
326                     item.getSecond(),
327                     XPathParts.getFrozenInstance(item.getFirst()),
328                     newParentToChildren);
329         }
330         newParentToChildren = ImmutableSetMultimap.copyOf(newParentToChildren);
331         return newParentToChildren;
332     }
333 
invertToMap( Multimap<String, String> oldParentToChildren, Multimap<String, String> childToParents)334     public static SortedMap<String, String> invertToMap(
335             Multimap<String, String> oldParentToChildren, Multimap<String, String> childToParents) {
336         TreeMap<String, String> childToParent = new TreeMap<>();
337         for (Entry<String, String> parentToChildren : oldParentToChildren.entries()) {
338             final String parent = parentToChildren.getKey();
339             final String child = parentToChildren.getValue();
340             String old = childToParent.put(child, parent);
341             if (old != null) {
342                 childToParents.put(child, old);
343                 childToParents.put(child, parent);
344             }
345         }
346         return ImmutableSortedMap.copyOf(childToParent);
347     }
348 
getAllKeysAndValues(Map<String, String> newItems)349     public static Set<String> getAllKeysAndValues(Map<String, String> newItems) {
350         Set<String> newSet = new TreeSet<>(newItems.values());
351         newSet.addAll(newItems.keySet());
352         return ImmutableSet.copyOf(newSet);
353     }
354 
handleLanguageGroups( String value, XPathParts parts, Multimap<String, String> languageGroups)355     private static boolean handleLanguageGroups(
356             String value, XPathParts parts, Multimap<String, String> languageGroups) {
357         String parent = parts.getAttributeValue(-1, "parent");
358         List<String> children = SupplementalDataInfo.WHITESPACE_SPLTTER.splitToList(value);
359         languageGroups.putAll(parent, children);
360         return true;
361     }
362 
formatMessage(String version, String language, String issue, String data)363     static String formatMessage(String version, String language, String issue, String data) {
364         return JOIN_TAB.join(
365                 version, getStatusForLanguage(language).abbr, show(language), issue, data);
366     }
367 }
368