1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.ImmutableSet; 5 import com.google.common.collect.ImmutableSetMultimap; 6 import com.google.common.collect.ImmutableSortedMap; 7 import com.google.common.collect.Multimap; 8 import com.google.common.collect.Sets; 9 import com.google.common.collect.Sets.SetView; 10 import com.google.common.collect.TreeMultimap; 11 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap; 12 import com.ibm.icu.util.VersionInfo; 13 import java.util.ArrayList; 14 import java.util.Collection; 15 import java.util.LinkedHashMap; 16 import java.util.List; 17 import java.util.Map; 18 import java.util.Map.Entry; 19 import java.util.Set; 20 import java.util.SortedMap; 21 import java.util.TreeMap; 22 import java.util.TreeSet; 23 import java.util.stream.Collectors; 24 import org.unicode.cldr.tool.ToolConstants; 25 import org.unicode.cldr.util.StandardCodes.LstrType; 26 import org.unicode.cldr.util.Validity.Status; 27 28 public class DiffLanguageGroups { 29 private static final Joiner JOIN_TAB = Joiner.on('\t'); 30 private static final String IN = " ➡︎ "; 31 static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 32 static final SupplementalDataInfo SDI = CONFIG.getSupplementalDataInfo(); 33 static final CLDRFile ENGLISH = CONFIG.getEnglish(); 34 35 enum LanguageStatus { 36 TC("TC"), 37 OTHER_BASIC_PLUS("OB"), 38 OTHER_CLDR("OC"), 39 NON_CLDR("NC"), 40 NON_REGULAR("XX"); 41 42 public final String abbr; 43 LanguageStatus(String s)44 private LanguageStatus(String s) { 45 abbr = s; 46 } 47 48 static final Set<LanguageStatus> SKIP_MISSING = 49 Set.of(LanguageStatus.NON_CLDR, LanguageStatus.NON_REGULAR); 50 } 51 52 static final Map<String, LanguageStatus> LanguageToStatus; 53 static final Map<LanguageStatus, Set<String>> StatusToLanguages; 54 55 static { 56 // add items to the map, most general first so the others can override 57 Map<String, LanguageStatus> temp = new TreeMap<>(); 58 59 Sets.union( 60 Validity.getInstance() 61 .getStatusToCodes(LstrType.language) 62 .get(Status.regular), 63 Set.of("mul")) 64 .stream() 65 .forEach(x -> temp.put(x, LanguageStatus.NON_CLDR)); 66 67 CONFIG.getCldrFactory().getAvailableLanguages().stream() 68 .forEach( 69 x -> { 70 if (!x.contains("_") && !x.equals("root")) 71 temp.put(x, LanguageStatus.OTHER_CLDR); 72 }); 73 74 CalculatedCoverageLevels.getInstance().getLevels().entrySet().stream() 75 .forEach( 76 x -> { 77 if (!x.getKey().contains("_")) 78 temp.put(x.getKey(), LanguageStatus.OTHER_BASIC_PLUS); 79 }); 80 81 Sets.difference( 82 StandardCodes.make().getLocaleCoverageLocales(Organization.cldr), 83 StandardCodes.make().getLocaleCoverageLocales(Organization.special)) 84 .stream() 85 .forEach( 86 x -> { 87 if (!x.contains("_")) temp.put(x, LanguageStatus.TC); 88 }); 89 LanguageToStatus = ImmutableMap.copyOf(temp); 90 91 Multimap<LanguageStatus, String> temp2 = TreeMultimap.create(); 92 LanguageToStatus.entrySet().stream().forEach(x -> temp2.put(x.getValue(), x.getKey())); 93 Map<LanguageStatus, Set<String>> temp3 = new LinkedHashMap<>(); 94 temp2.asMap().entrySet().forEach(x -> temp3.put(x.getKey(), new TreeSet<>(x.getValue()))); 95 StatusToLanguages = CldrUtility.protectCollection(temp3); 96 } 97 getStatusForLanguage(String joint)98 public static LanguageStatus getStatusForLanguage(String joint) { 99 return CldrUtility.ifNull(LanguageToStatus.get(joint), LanguageStatus.NON_REGULAR); 100 } 101 102 static String OLD = "OLD"; 103 static String NEW = "NEW"; 104 main(String[] args)105 public static void main(String[] args) { 106 System.out.println( 107 "Args are OLD and NEW CLDR versions. Defaults: OLD = last release, NEW = current data. Format is X.Y, eg: " 108 + ToolConstants.LAST_RELEASE_VERSION_WITH0); 109 System.out.println("\nReading old supplemental: may have unrelated errors."); 110 final SupplementalDataInfo oldSupplementalInfo = 111 SupplementalDataInfo.getInstance( 112 CldrUtility.getPath(CLDRPaths.LAST_COMMON_DIRECTORY, "supplemental/")); 113 System.out.println(); 114 115 VersionInfo oldVersion = oldSupplementalInfo.getCldrVersion(); 116 117 String oldBase = ToolConstants.LAST_RELEASE_VERSION_WITH0; 118 String newBase = null; 119 120 if (args.length > 0) { 121 oldBase = args[0]; 122 if (args.length > 1) { 123 newBase = args[1]; 124 } 125 } 126 String oldPath = 127 CLDRPaths.ARCHIVE_DIRECTORY 128 + "cldr-" 129 + oldBase 130 + "/common/supplemental/languageGroup.xml"; 131 String newPath = 132 newBase == null 133 ? CLDRPaths.COMMON_DIRECTORY + "supplemental/languageGroup.xml" 134 : CLDRPaths.ARCHIVE_DIRECTORY 135 + "cldr-" 136 + newBase 137 + "/common/supplemental/languageGroup.xml"; 138 139 OLD = "v" + oldVersion.getVersionString(1, 2); 140 NEW = "V" + SDI.getCldrVersion().getVersionString(1, 2); 141 142 System.out.println("* KEY"); 143 for (LanguageStatus status : LanguageStatus.values()) { 144 System.out.println("\t" + status.abbr + "\t" + status.toString()); 145 } 146 System.out.println(); 147 148 // Get OLD information 149 150 Multimap<String, String> oldErrors = TreeMultimap.create(); 151 SortedMap<String, String> oldChildToParent = 152 invertToMap(loadLanguageGroups(oldPath), oldErrors); 153 if (!oldErrors.isEmpty()) { 154 showErrors(OLD, oldErrors); 155 } 156 Set<String> oldSet = getAllKeysAndValues(oldChildToParent); 157 158 // Old info 159 for (Entry<LanguageStatus, Set<String>> entry : StatusToLanguages.entrySet()) { 160 checkAgainstReference(OLD, entry.getKey(), entry.getValue(), oldSet); 161 } 162 163 // get NEW information 164 165 Multimap<String, String> newErrors = TreeMultimap.create(); 166 SortedMap<String, String> newChildToParent = 167 invertToMap(loadLanguageGroups(newPath), newErrors); 168 if (!newErrors.isEmpty()) { 169 showErrors(NEW, newErrors); 170 } 171 172 Set<String> newSet = getAllKeysAndValues(newChildToParent); 173 for (Entry<LanguageStatus, Set<String>> entry : StatusToLanguages.entrySet()) { 174 checkAgainstReference(NEW, entry.getKey(), entry.getValue(), newSet); 175 } 176 177 // Show differences 178 179 // showDiff("Δ Removing (" + OLD + "-" + NEW + ")", Sets.difference(oldSet, newSet)); 180 for (LanguageStatus status : LanguageStatus.values()) { 181 for (String joint : Sets.difference(oldSet, newSet)) { 182 if (getStatusForLanguage(joint) != status) { 183 continue; 184 } 185 List<String> childToParent = getChain(joint, oldChildToParent, new ArrayList<>()); 186 System.out.println( 187 JOIN_TAB.join( 188 OLD, 189 getStatusForLanguage(joint).abbr, 190 show(joint), 191 "Removed", 192 childToParent.stream() 193 .map(x -> show(x)) 194 .collect(Collectors.joining(IN)))); 195 } 196 } 197 198 // showDiff("Δ Adding (" + NEW + "-" + OLD + ")", Sets.difference(newSet, oldSet)); 199 for (LanguageStatus status : LanguageStatus.values()) { 200 for (String joint : Sets.difference(newSet, oldSet)) { 201 if (getStatusForLanguage(joint) != status) { 202 continue; 203 } 204 List<String> childToParent = getChain(joint, newChildToParent, new ArrayList<>()); 205 System.out.println( 206 JOIN_TAB.join( 207 NEW, 208 getStatusForLanguage(joint).abbr, 209 show(joint), 210 "Added", 211 childToParent.stream() 212 .map(x -> show(x)) 213 .collect(Collectors.joining(IN)))); 214 } 215 } 216 217 Set<String> changed = new TreeSet<>(); 218 for (String joint : Sets.intersection(oldSet, newSet)) { 219 List<String> oldChain = getChain(joint, oldChildToParent, new ArrayList<>()); 220 List<String> newChain = getChain(joint, newChildToParent, new ArrayList<>()); 221 if (!oldChain.equals(newChain)) { 222 changed.add(joint); 223 } 224 } 225 // showDiff("Δ Moving (" + OLD + " to " + NEW + ")", changed); 226 227 for (LanguageStatus status : LanguageStatus.values()) { 228 for (String joint : changed) { 229 if (getStatusForLanguage(joint) != status) { 230 continue; 231 } 232 List<String> oldChain = getChain(joint, oldChildToParent, new ArrayList<>()); 233 List<String> newChain = getChain(joint, newChildToParent, new ArrayList<>()); 234 System.out.println( 235 JOIN_TAB.join( 236 OLD + "-" + NEW, 237 getStatusForLanguage(joint).abbr, 238 show(joint), 239 "Moved FROM", 240 oldChain.stream().map(x -> show(x)).collect(Collectors.joining(IN)), 241 "TO", 242 newChain.stream() 243 .map(x -> show(x)) 244 .collect(Collectors.joining(IN)))); 245 } 246 } 247 } 248 checkAgainstReference( String version, LanguageStatus languageStatus, Set<String> cldrLanguages, Set<String> oldSet)249 private static void checkAgainstReference( 250 String version, 251 LanguageStatus languageStatus, 252 Set<String> cldrLanguages, 253 Set<String> oldSet) { 254 if (LanguageStatus.SKIP_MISSING.contains(languageStatus)) { 255 return; 256 } 257 SetView<String> missing = Sets.difference(cldrLanguages, oldSet); 258 if (!missing.isEmpty()) { 259 System.out.println( 260 JOIN_TAB.join( 261 version, 262 languageStatus.abbr, 263 "…", 264 "Missing", 265 missing.stream().map(x -> show(x)).collect(Collectors.joining(", ")))); 266 } 267 } 268 showDiff(String title, Set<String> oldMinusOther)269 public static void showDiff(String title, Set<String> oldMinusOther) { 270 if (!oldMinusOther.isEmpty()) { 271 System.out.println( 272 title 273 + "\t" 274 + oldMinusOther.size() 275 + ":\t" 276 + oldMinusOther.stream() 277 .map(x -> show(x)) 278 .collect(Collectors.joining(", "))); 279 } 280 } 281 show(String languageCode)282 public static String show(String languageCode) { 283 return languageCode.equals("mul") ? "Ω" : getName(languageCode) + " ⁅" + languageCode + "⁆"; 284 } 285 getName(String languageCode)286 public static String getName(String languageCode) { 287 String result = ENGLISH.getName(CLDRFile.LANGUAGE_NAME, languageCode); 288 return result == null ? "(no name)" : result.replace(" (Other)", ""); 289 } 290 showErrors(String title, Multimap<String, String> oldErrors)291 public static void showErrors(String title, Multimap<String, String> oldErrors) { 292 for (LanguageStatus status : LanguageStatus.values()) { 293 for (Entry<String, Collection<String>> entry : oldErrors.asMap().entrySet()) { 294 if (getStatusForLanguage(entry.getKey()) != status) { 295 continue; 296 } 297 System.out.println( 298 formatMessage( 299 title, 300 entry.getKey(), 301 "Multiple parents", 302 entry.getValue().stream() 303 .map(x -> show(x)) 304 .collect(Collectors.joining(" ")))); 305 } 306 } 307 } 308 getChain( String joint, Map<String, String> childToParent, List<String> result)309 private static List<String> getChain( 310 String joint, Map<String, String> childToParent, List<String> result) { 311 String parent = childToParent.get(joint); 312 if (parent == null) { 313 return result; 314 } 315 result.add(parent); 316 return getChain(parent, childToParent, result); 317 } 318 loadLanguageGroups(String filename)319 public static Multimap<String, String> loadLanguageGroups(String filename) { 320 Multimap<String, String> newParentToChildren = TreeMultimap.create(); 321 322 for (Pair<String, String> item : 323 XMLFileReader.loadPathValues( 324 filename, new ArrayList<Pair<String, String>>(), false)) { 325 handleLanguageGroups( 326 item.getSecond(), 327 XPathParts.getFrozenInstance(item.getFirst()), 328 newParentToChildren); 329 } 330 newParentToChildren = ImmutableSetMultimap.copyOf(newParentToChildren); 331 return newParentToChildren; 332 } 333 invertToMap( Multimap<String, String> oldParentToChildren, Multimap<String, String> childToParents)334 public static SortedMap<String, String> invertToMap( 335 Multimap<String, String> oldParentToChildren, Multimap<String, String> childToParents) { 336 TreeMap<String, String> childToParent = new TreeMap<>(); 337 for (Entry<String, String> parentToChildren : oldParentToChildren.entries()) { 338 final String parent = parentToChildren.getKey(); 339 final String child = parentToChildren.getValue(); 340 String old = childToParent.put(child, parent); 341 if (old != null) { 342 childToParents.put(child, old); 343 childToParents.put(child, parent); 344 } 345 } 346 return ImmutableSortedMap.copyOf(childToParent); 347 } 348 getAllKeysAndValues(Map<String, String> newItems)349 public static Set<String> getAllKeysAndValues(Map<String, String> newItems) { 350 Set<String> newSet = new TreeSet<>(newItems.values()); 351 newSet.addAll(newItems.keySet()); 352 return ImmutableSet.copyOf(newSet); 353 } 354 handleLanguageGroups( String value, XPathParts parts, Multimap<String, String> languageGroups)355 private static boolean handleLanguageGroups( 356 String value, XPathParts parts, Multimap<String, String> languageGroups) { 357 String parent = parts.getAttributeValue(-1, "parent"); 358 List<String> children = SupplementalDataInfo.WHITESPACE_SPLTTER.splitToList(value); 359 languageGroups.putAll(parent, children); 360 return true; 361 } 362 formatMessage(String version, String language, String issue, String data)363 static String formatMessage(String version, String language, String issue, String data) { 364 return JOIN_TAB.join( 365 version, getStatusForLanguage(language).abbr, show(language), issue, data); 366 } 367 } 368