1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.tool; 8 9 import com.ibm.icu.impl.Utility; 10 import com.ibm.icu.text.Transliterator; 11 import com.ibm.icu.text.UTF16; 12 import java.io.BufferedReader; 13 import java.io.File; 14 import java.io.IOException; 15 import java.io.PrintWriter; 16 import java.util.ArrayList; 17 import java.util.Collection; 18 import java.util.Comparator; 19 import java.util.Iterator; 20 import java.util.LinkedHashSet; 21 import java.util.List; 22 import java.util.Map; 23 import java.util.Set; 24 import java.util.TreeMap; 25 import java.util.TreeSet; 26 import org.unicode.cldr.draft.FileUtilities; 27 import org.unicode.cldr.util.ArrayComparator; 28 import org.unicode.cldr.util.CLDRFile; 29 import org.unicode.cldr.util.CLDRFile.DraftStatus; 30 import org.unicode.cldr.util.Factory; 31 import org.unicode.cldr.util.Log; 32 import org.unicode.cldr.util.PathUtilities; 33 import org.unicode.cldr.util.SimpleFactory; 34 import org.unicode.cldr.util.XPathParts; 35 36 /** 37 * Tool for applying modifications to the CLDR files. Use -h to see the options. 38 * 39 * <p>There are some environment variables that can be used with the program <br> 40 * -DSHOW_FILES=<anything> shows all create/open of files. 41 */ 42 public class VettingAdder { 43 44 private Map<String, Set<String>> locale_files = new TreeMap<>(); 45 private Comparator<String> scomp = new UTF16.StringComparator(); 46 private Set<Object[]> conflictSet = 47 new TreeSet<Object[]>(new ArrayComparator(new Comparator[] {scomp, scomp, scomp})); 48 VettingAdder(String sourceDirectory)49 public VettingAdder(String sourceDirectory) throws IOException { 50 addFiles(sourceDirectory); 51 } 52 addFiles(String sourceDirectory)53 private void addFiles(String sourceDirectory) throws IOException { 54 File f = new File(sourceDirectory); 55 String normalizedPath = PathUtilities.getNormalizedPathString(f); 56 if (!f.isDirectory()) { 57 String name = f.getName(); 58 if (name.startsWith("fixed-")) return; // skip 59 if (name.equals(".htaccess")) return; // skip 60 if (!name.endsWith(".xml")) { 61 Log.logln("Wrong filename format: " + PathUtilities.getNormalizedPathString(f)); 62 return; 63 } 64 String localeName = name.substring(0, name.length() - 4); 65 Set<String> s = locale_files.get(localeName); 66 if (s == null) { 67 locale_files.put(localeName, s = new TreeSet<>()); 68 } 69 s.add(f.getParent()); 70 } else { 71 String[] subnames = f.list(); 72 for (int i = 0; i < subnames.length; ++i) { 73 addFiles(normalizedPath + File.separatorChar + subnames[i]); 74 } 75 } 76 } 77 78 static class VettingInfo { 79 private String value; 80 private String fullPath; 81 private String dir; 82 VettingInfo(String dir, String fullPath, String value)83 public VettingInfo(String dir, String fullPath, String value) { 84 this.value = value; 85 this.fullPath = fullPath; 86 this.dir = dir; 87 } 88 89 @Override toString()90 public String toString() { 91 return "source: " + dir + ";\t value: <" + value + ">"; 92 } 93 compareByPathAndValue(VettingInfo other)94 public int compareByPathAndValue(VettingInfo other) { 95 int result; 96 if (0 != (result = fullPath.compareTo(other.fullPath))) return result; 97 if (0 != (result = value.compareTo(other.value))) return result; 98 return 0; 99 } 100 } 101 102 static Comparator PathAndValueComparator = 103 new Comparator() { 104 @Override 105 public int compare(Object o1, Object o2) { 106 return ((VettingInfo) o1).compareByPathAndValue((VettingInfo) o2); 107 } 108 }; 109 110 static class VettingInfoSet { 111 private Map<String, List<VettingInfo>> path_vettingInfoList = new TreeMap<>(); 112 add(String path, String dir, String fullPath, String value)113 public void add(String path, String dir, String fullPath, String value) { 114 VettingInfo vi = new VettingInfo(dir, fullPath, value); 115 List<VettingInfo> s = path_vettingInfoList.get(path); 116 if (s == null) path_vettingInfoList.put(path, s = new ArrayList<>(1)); 117 s.add(vi); 118 } 119 iterator()120 public Iterator<String> iterator() { 121 return path_vettingInfoList.keySet().iterator(); 122 } 123 get(String path)124 public Collection<VettingInfo> get(String path) { 125 return path_vettingInfoList.get(path); 126 } 127 } 128 keySet()129 public Set<String> keySet() { 130 return locale_files.keySet(); 131 } 132 incorporateVetting(String locale, String targetDir)133 public void incorporateVetting(String locale, String targetDir) throws IOException { 134 Set<String> s = locale_files.get(locale); 135 Log.logln("Vetting Data for: " + locale); 136 VettingInfoSet accum = new VettingInfoSet(); 137 for (Iterator<String> it2 = s.iterator(); it2.hasNext(); ) { 138 String dir = it2.next() + File.separator; 139 String fixedLocale = "fixed-" + locale + ".xml"; 140 fixXML(dir, locale + ".xml", dir, fixedLocale); 141 CLDRFile cldr = 142 SimpleFactory.makeFromFile(dir + fixedLocale, locale, DraftStatus.approved); 143 for (Iterator<String> it3 = cldr.iterator(); it3.hasNext(); ) { 144 String path = it3.next(); 145 String value = cldr.getStringValue(path); 146 String fullPath = cldr.getFullXPath(path); 147 // skip bogus values 148 if (value.startsWith("//ldml") || value.length() == 0) { 149 Log.logln("Skipping: [" + value + "] for " + fullPath); 150 continue; 151 } 152 accum.add(stripAlt(path), dir, stripAlt(fullPath), value); 153 } 154 } 155 // now walk though items. If there is a single value, keep it 156 // otherwise show 157 Set<VettingInfo> uniquePathAndValue = new TreeSet<VettingInfo>(PathAndValueComparator); 158 CLDRFile cldrDelta = SimpleFactory.makeFile(locale); 159 boolean gotOne = false; 160 for (Iterator<String> it2 = accum.iterator(); it2.hasNext(); ) { 161 String path = it2.next(); 162 Collection<VettingInfo> c = accum.get(path); 163 uniquePathAndValue.clear(); 164 uniquePathAndValue.addAll(c); 165 if (uniquePathAndValue.size() == 1) { // no conflict 166 VettingInfo vi = uniquePathAndValue.iterator().next(); 167 cldrDelta.add(vi.fullPath, vi.value); 168 gotOne = true; 169 } else { // there is a conflict 170 conflictSet.add(new Object[] {locale, path, c}); 171 } 172 } 173 if (gotOne) { 174 Log.logln("Writing: " + targetDir + locale + ".xml"); 175 PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, locale + ".xml"); 176 cldrDelta.write(pw); 177 pw.close(); 178 } else { 179 Log.logln("No data left in: " + targetDir + locale + ".xml"); 180 } 181 } 182 showSources()183 public void showSources() { 184 for (Iterator<String> it = locale_files.keySet().iterator(); it.hasNext(); ) { 185 String key = it.next(); 186 Set<String> s = locale_files.get(key); 187 for (Iterator<String> it2 = s.iterator(); it2.hasNext(); ) { 188 Log.logln(key + " \t" + it2.next()); 189 key = ""; 190 } 191 } 192 } 193 fixXML(String inputDir, String inputFile, String outputDir, String outputFile)194 public void fixXML(String inputDir, String inputFile, String outputDir, String outputFile) 195 throws IOException { 196 BufferedReader in = FileUtilities.openUTF8Reader(inputDir, inputFile); 197 PrintWriter out = FileUtilities.openUTF8Writer(outputDir, outputFile); 198 int haveLanguages = 0, 199 haveScripts = 0, 200 haveTerritories = 0, 201 haveVariants = 0, 202 haveKeys = 0, 203 haveTypes = 0; 204 int inLocaleDisplayNames = 0; 205 while (true) { 206 String line = in.readLine(); 207 if (line == null) break; 208 String trimmed = line.trim(); 209 210 if (inLocaleDisplayNames == 1) { 211 haveLanguages = fixItem(out, haveLanguages, trimmed, "<language ", "languages"); 212 haveScripts = fixItem(out, haveScripts, trimmed, "<script ", "scripts"); 213 haveTerritories = 214 fixItem(out, haveTerritories, trimmed, "<territory ", "territories"); 215 haveVariants = fixItem(out, haveVariants, trimmed, "<variant ", "variants"); 216 haveKeys = fixItem(out, haveKeys, trimmed, "<key ", "keys"); 217 haveTypes = fixItem(out, haveTypes, trimmed, "<type ", "types"); 218 } 219 220 if (trimmed.startsWith("<localeDisplayNames")) inLocaleDisplayNames = 1; 221 else if (trimmed.startsWith("</localeDisplayNames")) inLocaleDisplayNames = 2; 222 223 out.println(line); 224 } 225 in.close(); 226 out.close(); 227 } 228 229 /** */ fixItem( PrintWriter out, int haveLanguages, String trimmed, String item, String fix)230 private int fixItem( 231 PrintWriter out, int haveLanguages, String trimmed, String item, String fix) { 232 if (trimmed.startsWith(item)) { 233 if (haveLanguages == 0) { 234 out.println("<" + fix + ">"); 235 haveLanguages = 1; 236 } 237 return haveLanguages; 238 } 239 if (haveLanguages == 1) { 240 out.println("</" + fix + ">"); 241 haveLanguages = 2; 242 } 243 return haveLanguages; 244 } 245 246 /** 247 * @return Returns the conflictSet. 248 */ getConflictSet()249 public Set<Object[]> getConflictSet() { 250 return conflictSet; 251 } 252 253 /** 254 * @param cldrFactory 255 * @throws IOException 256 */ showFiles(Factory cldrFactory, String targetDir)257 public void showFiles(Factory cldrFactory, String targetDir) throws IOException { 258 english = cldrFactory.make("en", true); 259 260 Log.logln(""); 261 Log.logln("A. Sources"); 262 Log.logln(""); 263 showSources(); 264 265 Log.logln(""); 266 Log.logln("B. Intermediate Results"); 267 Log.logln(""); 268 Set<String> vettedLocales = keySet(); 269 for (Iterator<String> it = vettedLocales.iterator(); it.hasNext(); ) { 270 incorporateVetting(it.next(), targetDir); 271 } 272 273 Log.logln(""); 274 Log.logln("C. Conflicts"); 275 Log.logln(""); 276 showConflicts(cldrFactory); 277 278 Log.logln(""); 279 Log.logln("D. Missing Vetting"); 280 Log.logln(""); 281 282 Set<String> availableLocales = new TreeSet<>(cldrFactory.getAvailable()); 283 availableLocales.removeAll(vettedLocales); 284 285 for (Iterator<String> it = availableLocales.iterator(); it.hasNext(); ) { 286 String locale = it.next(); 287 CLDRFile cldr = cldrFactory.make(locale, false); 288 for (Iterator<String> it2 = cldr.iterator(); it2.hasNext(); ) { 289 String path = it2.next(); 290 String fullPath = cldr.getFullXPath(path); 291 if (fullPath.indexOf("[@draft=") >= 0) { 292 Log.logln(locale + " \t" + english.getName(locale) + "\texample: " + fullPath); 293 break; 294 } 295 } 296 } 297 } 298 299 CLDRFile english; 300 301 /** */ showConflicts(Factory cldrFactory)302 private void showConflicts(Factory cldrFactory) { 303 304 Set<Object[]> s = getConflictSet(); 305 String lastLocale = ""; 306 CLDRFile cldr = null; 307 Transliterator any_latin = Transliterator.getInstance("any-latin"); 308 Set<String> emails = new LinkedHashSet<>(); 309 String[] pieces = new String[5]; 310 311 for (Iterator<Object[]> it = s.iterator(); it.hasNext(); ) { 312 Object[] items = it.next(); 313 String entry = ""; 314 if (!lastLocale.equals(items[0])) { 315 showSet(emails); 316 lastLocale = (String) items[0]; 317 cldr = cldrFactory.make(lastLocale, false); 318 entry = "==========" + Utility.LINE_SEPARATOR + lastLocale + Utility.LINE_SEPARATOR; 319 } 320 String path = CLDRFile.getDistinguishingXPath((String) items[1], null); 321 String current = cldr.getStringValue(path); 322 entry += 323 "\tpath:\t" 324 + path 325 + Utility.LINE_SEPARATOR 326 + "\tcurrent value:\t" 327 + getValue(any_latin, current) 328 + Utility.LINE_SEPARATOR; 329 330 entry += 331 "\tEnglish value:\t" 332 + getValue(any_latin, english.getStringValue(path)) 333 + Utility.LINE_SEPARATOR; 334 Collection<VettingInfo> c = (Collection<VettingInfo>) items[2]; 335 for (Iterator<VettingInfo> it2 = c.iterator(); it2.hasNext(); ) { 336 VettingInfo vi = it2.next(); 337 entry += 338 "\t\tvalue:\t" 339 + getValue(any_latin, vi.value) 340 + "\t source: " 341 + vi.dir 342 + Utility.LINE_SEPARATOR; 343 // get third field, that's the email 344 Utility.split(vi.dir, '\\', pieces); 345 emails.add(pieces[2]); 346 } 347 348 if (false) { 349 System.out.println("path: " + path); 350 for (int i = 0; i < items.length; ++i) { 351 System.out.println("item[" + i + "]: " + items[i]); 352 } 353 } 354 Log.logln(entry); 355 } 356 showSet(emails); 357 } 358 359 /** */ showSet(Set<String> emails)360 private void showSet(Set<String> emails) { 361 if (emails.size() == 0) return; 362 String result = "Emails:\t"; 363 for (Iterator<String> it = emails.iterator(); it.hasNext(); ) { 364 result += it.next() + ", "; 365 } 366 result += "cldr@unicode.org"; 367 emails.clear(); 368 Log.logln(result); 369 } 370 371 /** */ getValue(Transliterator some, String current)372 private String getValue(Transliterator some, String current) { 373 if (current == null) current = "NULL"; 374 String other = some.transliterate(current); 375 return "<" + current + ">" + (other.equals(current) ? "" : "\t[" + other + "]"); 376 } 377 378 /** */ stripAlt(String path)379 private String stripAlt(String path) { 380 XPathParts tempParts = XPathParts.getFrozenInstance(path); 381 Map<String, String> x = tempParts.getAttributes(tempParts.size() - 1); 382 String value = x.get("alt"); 383 if (value != null && value.startsWith("proposed")) { 384 x.remove("alt"); 385 // System.out.println(path + "\t=>\t" + tempParts.toString()); 386 return tempParts.toString(); 387 } 388 return path; 389 } 390 } 391