1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.tool; 8 9 import java.io.BufferedReader; 10 import java.io.File; 11 import java.io.IOException; 12 import java.io.PrintWriter; 13 import java.util.ArrayList; 14 import java.util.Collection; 15 import java.util.Comparator; 16 import java.util.Iterator; 17 import java.util.LinkedHashSet; 18 import java.util.List; 19 import java.util.Map; 20 import java.util.Set; 21 import java.util.TreeMap; 22 import java.util.TreeSet; 23 24 import org.unicode.cldr.draft.FileUtilities; 25 import org.unicode.cldr.util.ArrayComparator; 26 import org.unicode.cldr.util.CLDRFile; 27 import org.unicode.cldr.util.CLDRFile.DraftStatus; 28 import org.unicode.cldr.util.Factory; 29 import org.unicode.cldr.util.Log; 30 import org.unicode.cldr.util.PathUtilities; 31 import org.unicode.cldr.util.SimpleFactory; 32 import org.unicode.cldr.util.XPathParts; 33 34 import com.ibm.icu.impl.Utility; 35 import com.ibm.icu.text.Transliterator; 36 import com.ibm.icu.text.UTF16; 37 38 /** 39 * Tool for applying modifications to the CLDR files. Use -h to see the options. 40 * <p> 41 * There are some environment variables that can be used with the program <br> 42 * -DSHOW_FILES=<anything> shows all create/open of files. 43 */ 44 public class VettingAdder { 45 46 private Map<String, Set<String>> locale_files = new TreeMap<>(); 47 private Comparator<String> scomp = new UTF16.StringComparator(); 48 private Set<Object[]> conflictSet = new TreeSet<Object[]>( 49 new ArrayComparator(new Comparator[] { scomp, scomp, scomp })); 50 VettingAdder(String sourceDirectory)51 public VettingAdder(String sourceDirectory) throws IOException { 52 addFiles(sourceDirectory); 53 } 54 addFiles(String sourceDirectory)55 private void addFiles(String sourceDirectory) throws IOException { 56 File f = new File(sourceDirectory); 57 String normalizedPath = PathUtilities.getNormalizedPathString(f); 58 if (!f.isDirectory()) { 59 String name = f.getName(); 60 if (name.startsWith("fixed-")) return; // skip 61 if (name.equals(".htaccess")) return; // skip 62 if (!name.endsWith(".xml")) { 63 Log.logln("Wrong filename format: " + PathUtilities.getNormalizedPathString(f)); 64 return; 65 } 66 String localeName = name.substring(0, name.length() - 4); 67 Set<String> s = locale_files.get(localeName); 68 if (s == null) { 69 locale_files.put(localeName, s = new TreeSet<>()); 70 } 71 s.add(f.getParent()); 72 } else { 73 String[] subnames = f.list(); 74 for (int i = 0; i < subnames.length; ++i) { 75 addFiles(normalizedPath + File.separatorChar + subnames[i]); 76 } 77 } 78 } 79 80 static class VettingInfo { 81 private String value; 82 private String fullPath; 83 private String dir; 84 VettingInfo(String dir, String fullPath, String value)85 public VettingInfo(String dir, String fullPath, String value) { 86 this.value = value; 87 this.fullPath = fullPath; 88 this.dir = dir; 89 } 90 91 @Override toString()92 public String toString() { 93 return "source: " + dir + ";\t value: <" + value + ">"; 94 } 95 compareByPathAndValue(VettingInfo other)96 public int compareByPathAndValue(VettingInfo other) { 97 int result; 98 if (0 != (result = fullPath.compareTo(other.fullPath))) return result; 99 if (0 != (result = value.compareTo(other.value))) return result; 100 return 0; 101 } 102 } 103 104 static Comparator PathAndValueComparator = new Comparator() { 105 @Override 106 public int compare(Object o1, Object o2) { 107 return ((VettingInfo) o1).compareByPathAndValue((VettingInfo) o2); 108 } 109 }; 110 111 static class VettingInfoSet { 112 private Map<String, List<VettingInfo>> path_vettingInfoList = new TreeMap<>(); 113 add(String path, String dir, String fullPath, String value)114 public void add(String path, String dir, String fullPath, String value) { 115 VettingInfo vi = new VettingInfo(dir, fullPath, value); 116 List<VettingInfo> s = path_vettingInfoList.get(path); 117 if (s == null) path_vettingInfoList.put(path, s = new ArrayList<>(1)); 118 s.add(vi); 119 } 120 iterator()121 public Iterator<String> iterator() { 122 return path_vettingInfoList.keySet().iterator(); 123 } 124 get(String path)125 public Collection<VettingInfo> get(String path) { 126 return path_vettingInfoList.get(path); 127 } 128 } 129 keySet()130 public Set<String> keySet() { 131 return locale_files.keySet(); 132 } 133 incorporateVetting(String locale, String targetDir)134 public void incorporateVetting(String locale, String targetDir) throws IOException { 135 Set<String> s = locale_files.get(locale); 136 Log.logln("Vetting Data for: " + locale); 137 VettingInfoSet accum = new VettingInfoSet(); 138 for (Iterator<String> it2 = s.iterator(); it2.hasNext();) { 139 String dir = it2.next() + File.separator; 140 String fixedLocale = "fixed-" + locale + ".xml"; 141 fixXML(dir, locale + ".xml", dir, fixedLocale); 142 CLDRFile cldr = SimpleFactory.makeFromFile(dir + fixedLocale, locale, DraftStatus.approved); 143 for (Iterator<String> it3 = cldr.iterator(); it3.hasNext();) { 144 String path = it3.next(); 145 String value = cldr.getStringValue(path); 146 String fullPath = cldr.getFullXPath(path); 147 // skip bogus values 148 if (value.startsWith("//ldml") || value.length() == 0) { 149 Log.logln("Skipping: [" + value + "] for " + fullPath); 150 continue; 151 } 152 accum.add(stripAlt(path), dir, stripAlt(fullPath), value); 153 } 154 } 155 // now walk though items. If there is a single value, keep it 156 // otherwise show 157 Set<VettingInfo> uniquePathAndValue = new TreeSet<VettingInfo>(PathAndValueComparator); 158 CLDRFile cldrDelta = SimpleFactory.makeFile(locale); 159 boolean gotOne = false; 160 for (Iterator<String> it2 = accum.iterator(); it2.hasNext();) { 161 String path = it2.next(); 162 Collection<VettingInfo> c = accum.get(path); 163 uniquePathAndValue.clear(); 164 uniquePathAndValue.addAll(c); 165 if (uniquePathAndValue.size() == 1) { // no conflict 166 VettingInfo vi = uniquePathAndValue.iterator().next(); 167 cldrDelta.add(vi.fullPath, vi.value); 168 gotOne = true; 169 } else { // there is a conflict 170 conflictSet.add(new Object[] { locale, path, c }); 171 } 172 } 173 if (gotOne) { 174 Log.logln("Writing: " + targetDir + locale + ".xml"); 175 PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, locale + ".xml"); 176 cldrDelta.write(pw); 177 pw.close(); 178 } else { 179 Log.logln("No data left in: " + targetDir + locale + ".xml"); 180 } 181 } 182 showSources()183 public void showSources() { 184 for (Iterator<String> it = locale_files.keySet().iterator(); it.hasNext();) { 185 String key = it.next(); 186 Set<String> s = locale_files.get(key); 187 for (Iterator<String> it2 = s.iterator(); it2.hasNext();) { 188 Log.logln(key + " \t" + it2.next()); 189 key = ""; 190 } 191 } 192 } 193 fixXML(String inputDir, String inputFile, String outputDir, String outputFile)194 public void fixXML(String inputDir, String inputFile, String outputDir, String outputFile) throws IOException { 195 BufferedReader in = FileUtilities.openUTF8Reader(inputDir, inputFile); 196 PrintWriter out = FileUtilities.openUTF8Writer(outputDir, outputFile); 197 int haveLanguages = 0, haveScripts = 0, haveTerritories = 0, haveVariants = 0, haveKeys = 0, haveTypes = 0; 198 int inLocaleDisplayNames = 0; 199 while (true) { 200 String line = in.readLine(); 201 if (line == null) break; 202 String trimmed = line.trim(); 203 204 if (inLocaleDisplayNames == 1) { 205 haveLanguages = fixItem(out, haveLanguages, trimmed, "<language ", "languages"); 206 haveScripts = fixItem(out, haveScripts, trimmed, "<script ", "scripts"); 207 haveTerritories = fixItem(out, haveTerritories, trimmed, "<territory ", "territories"); 208 haveVariants = fixItem(out, haveVariants, trimmed, "<variant ", "variants"); 209 haveKeys = fixItem(out, haveKeys, trimmed, "<key ", "keys"); 210 haveTypes = fixItem(out, haveTypes, trimmed, "<type ", "types"); 211 } 212 213 if (trimmed.startsWith("<localeDisplayNames")) 214 inLocaleDisplayNames = 1; 215 else if (trimmed.startsWith("</localeDisplayNames")) inLocaleDisplayNames = 2; 216 217 out.println(line); 218 } 219 in.close(); 220 out.close(); 221 } 222 223 /** 224 * 225 */ fixItem(PrintWriter out, int haveLanguages, String trimmed, String item, String fix)226 private int fixItem(PrintWriter out, int haveLanguages, String trimmed, String item, String fix) { 227 if (trimmed.startsWith(item)) { 228 if (haveLanguages == 0) { 229 out.println("<" + fix + ">"); 230 haveLanguages = 1; 231 } 232 return haveLanguages; 233 } 234 if (haveLanguages == 1) { 235 out.println("</" + fix + ">"); 236 haveLanguages = 2; 237 } 238 return haveLanguages; 239 } 240 241 /** 242 * @return Returns the conflictSet. 243 */ getConflictSet()244 public Set<Object[]> getConflictSet() { 245 return conflictSet; 246 } 247 248 /** 249 * @param cldrFactory 250 * @throws IOException 251 * 252 */ showFiles(Factory cldrFactory, String targetDir)253 public void showFiles(Factory cldrFactory, String targetDir) throws IOException { 254 english = cldrFactory.make("en", true); 255 256 Log.logln(""); 257 Log.logln("A. Sources"); 258 Log.logln(""); 259 showSources(); 260 261 Log.logln(""); 262 Log.logln("B. Intermediate Results"); 263 Log.logln(""); 264 Set<String> vettedLocales = keySet(); 265 for (Iterator<String> it = vettedLocales.iterator(); it.hasNext();) { 266 incorporateVetting(it.next(), targetDir); 267 } 268 269 Log.logln(""); 270 Log.logln("C. Conflicts"); 271 Log.logln(""); 272 showConflicts(cldrFactory); 273 274 Log.logln(""); 275 Log.logln("D. Missing Vetting"); 276 Log.logln(""); 277 278 Set<String> availableLocales = new TreeSet<>(cldrFactory.getAvailable()); 279 availableLocales.removeAll(vettedLocales); 280 281 for (Iterator<String> it = availableLocales.iterator(); it.hasNext();) { 282 String locale = it.next(); 283 CLDRFile cldr = cldrFactory.make(locale, false); 284 for (Iterator<String> it2 = cldr.iterator(); it2.hasNext();) { 285 String path = it2.next(); 286 String fullPath = cldr.getFullXPath(path); 287 if (fullPath.indexOf("[@draft=") >= 0) { 288 Log.logln(locale + " \t" + english.getName(locale) + "\texample: " + fullPath); 289 break; 290 } 291 } 292 } 293 } 294 295 CLDRFile english; 296 297 /** 298 * 299 */ showConflicts(Factory cldrFactory)300 private void showConflicts(Factory cldrFactory) { 301 302 Set<Object[]> s = getConflictSet(); 303 String lastLocale = ""; 304 CLDRFile cldr = null; 305 Transliterator any_latin = Transliterator.getInstance("any-latin"); 306 Set<String> emails = new LinkedHashSet<>(); 307 String[] pieces = new String[5]; 308 309 for (Iterator<Object[]> it = s.iterator(); it.hasNext();) { 310 Object[] items = it.next(); 311 String entry = ""; 312 if (!lastLocale.equals(items[0])) { 313 showSet(emails); 314 lastLocale = (String) items[0]; 315 cldr = cldrFactory.make(lastLocale, false); 316 entry = "==========" + Utility.LINE_SEPARATOR + lastLocale + Utility.LINE_SEPARATOR; 317 } 318 String path = CLDRFile.getDistinguishingXPath((String) items[1], null); 319 String current = cldr.getStringValue(path); 320 entry += "\tpath:\t" + path + Utility.LINE_SEPARATOR + "\tcurrent value:\t" + getValue(any_latin, current) 321 + Utility.LINE_SEPARATOR; 322 323 entry += "\tEnglish value:\t" + getValue(any_latin, english.getStringValue(path)) + Utility.LINE_SEPARATOR; 324 Collection<VettingInfo> c = (Collection<VettingInfo>) items[2]; 325 for (Iterator<VettingInfo> it2 = c.iterator(); it2.hasNext();) { 326 VettingInfo vi = it2.next(); 327 entry += "\t\tvalue:\t" + getValue(any_latin, vi.value) + "\t source: " + vi.dir 328 + Utility.LINE_SEPARATOR; 329 // get third field, that's the email 330 Utility.split(vi.dir, '\\', pieces); 331 emails.add(pieces[2]); 332 } 333 334 if (false) { 335 System.out.println("path: " + path); 336 for (int i = 0; i < items.length; ++i) { 337 System.out.println("item[" + i + "]: " + items[i]); 338 } 339 } 340 Log.logln(entry); 341 } 342 showSet(emails); 343 } 344 345 /** 346 * 347 */ showSet(Set<String> emails)348 private void showSet(Set<String> emails) { 349 if (emails.size() == 0) return; 350 String result = "Emails:\t"; 351 for (Iterator<String> it = emails.iterator(); it.hasNext();) { 352 result += it.next() + ", "; 353 } 354 result += "cldr@unicode.org"; 355 emails.clear(); 356 Log.logln(result); 357 } 358 359 /** 360 * 361 */ getValue(Transliterator some, String current)362 private String getValue(Transliterator some, String current) { 363 if (current == null) current = "NULL"; 364 String other = some.transliterate(current); 365 return "<" + current + ">" + (other.equals(current) ? "" : "\t[" + other + "]"); 366 } 367 368 /** 369 * 370 */ stripAlt(String path)371 private String stripAlt(String path) { 372 XPathParts tempParts = XPathParts.getFrozenInstance(path); 373 Map<String, String> x = tempParts.getAttributes(tempParts.size() - 1); 374 String value = x.get("alt"); 375 if (value != null && value.startsWith("proposed")) { 376 x.remove("alt"); 377 // System.out.println(path + "\t=>\t" + tempParts.toString()); 378 return tempParts.toString(); 379 } 380 return path; 381 } 382 } 383