1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.tool; 8 9 import java.io.BufferedReader; 10 import java.io.File; 11 import java.io.IOException; 12 import java.io.PrintWriter; 13 import java.util.ArrayList; 14 import java.util.Collection; 15 import java.util.Comparator; 16 import java.util.Iterator; 17 import java.util.LinkedHashSet; 18 import java.util.List; 19 import java.util.Map; 20 import java.util.Set; 21 import java.util.TreeMap; 22 import java.util.TreeSet; 23 24 import org.unicode.cldr.draft.FileUtilities; 25 import org.unicode.cldr.util.ArrayComparator; 26 import org.unicode.cldr.util.CLDRFile; 27 import org.unicode.cldr.util.CLDRFile.DraftStatus; 28 import org.unicode.cldr.util.Factory; 29 import org.unicode.cldr.util.Log; 30 import org.unicode.cldr.util.SimpleFactory; 31 import org.unicode.cldr.util.XPathParts; 32 33 import com.ibm.icu.impl.Utility; 34 import com.ibm.icu.text.Transliterator; 35 import com.ibm.icu.text.UTF16; 36 37 /** 38 * Tool for applying modifications to the CLDR files. Use -h to see the options. 39 * <p> 40 * There are some environment variables that can be used with the program <br> 41 * -DSHOW_FILES=<anything> shows all create/open of files. 42 */ 43 public class VettingAdder { 44 45 private Map<String, Set<String>> locale_files = new TreeMap<String, Set<String>>(); 46 private Comparator<String> scomp = new UTF16.StringComparator(); 47 private Set<Object[]> conflictSet = new TreeSet<Object[]>( 48 new ArrayComparator(new Comparator[] { scomp, scomp, scomp })); 49 VettingAdder(String sourceDirectory)50 public VettingAdder(String sourceDirectory) throws IOException { 51 addFiles(sourceDirectory); 52 } 53 addFiles(String sourceDirectory)54 private void addFiles(String sourceDirectory) throws IOException { 55 File f = new File(sourceDirectory); 56 String canonicalName = f.getCanonicalPath(); 57 if (!f.isDirectory()) { 58 String name = f.getName(); 59 if (name.startsWith("fixed-")) return; // skip 60 if (name.equals(".htaccess")) return; // skip 61 if (!name.endsWith(".xml")) { 62 Log.logln("Wrong filename format: " + f.getCanonicalPath()); 63 return; 64 } 65 String localeName = name.substring(0, name.length() - 4); 66 Set<String> s = locale_files.get(localeName); 67 if (s == null) { 68 locale_files.put(localeName, s = new TreeSet<String>()); 69 } 70 s.add(f.getParent()); 71 } else { 72 String[] subnames = f.list(); 73 for (int i = 0; i < subnames.length; ++i) { 74 addFiles(canonicalName + File.separatorChar + subnames[i]); 75 } 76 } 77 } 78 79 static class VettingInfo { 80 private String value; 81 private String fullPath; 82 private String dir; 83 VettingInfo(String dir, String fullPath, String value)84 public VettingInfo(String dir, String fullPath, String value) { 85 this.value = value; 86 this.fullPath = fullPath; 87 this.dir = dir; 88 } 89 toString()90 public String toString() { 91 return "source: " + dir + ";\t value: <" + value + ">"; 92 } 93 compareByPathAndValue(VettingInfo other)94 public int compareByPathAndValue(VettingInfo other) { 95 int result; 96 if (0 != (result = fullPath.compareTo(other.fullPath))) return result; 97 if (0 != (result = value.compareTo(other.value))) return result; 98 return 0; 99 } 100 } 101 102 static Comparator PathAndValueComparator = new Comparator() { 103 public int compare(Object o1, Object o2) { 104 return ((VettingInfo) o1).compareByPathAndValue((VettingInfo) o2); 105 } 106 }; 107 108 static class VettingInfoSet { 109 private Map<String, List<VettingInfo>> path_vettingInfoList = new TreeMap<String, List<VettingInfo>>(); 110 add(String path, String dir, String fullPath, String value)111 public void add(String path, String dir, String fullPath, String value) { 112 VettingInfo vi = new VettingInfo(dir, fullPath, value); 113 List<VettingInfo> s = path_vettingInfoList.get(path); 114 if (s == null) path_vettingInfoList.put(path, s = new ArrayList<VettingInfo>(1)); 115 s.add(vi); 116 } 117 iterator()118 public Iterator<String> iterator() { 119 return path_vettingInfoList.keySet().iterator(); 120 } 121 get(String path)122 public Collection<VettingInfo> get(String path) { 123 return path_vettingInfoList.get(path); 124 } 125 } 126 keySet()127 public Set<String> keySet() { 128 return locale_files.keySet(); 129 } 130 incorporateVetting(String locale, String targetDir)131 public void incorporateVetting(String locale, String targetDir) throws IOException { 132 Set<String> s = locale_files.get(locale); 133 Log.logln("Vetting Data for: " + locale); 134 VettingInfoSet accum = new VettingInfoSet(); 135 for (Iterator<String> it2 = s.iterator(); it2.hasNext();) { 136 String dir = it2.next() + File.separator; 137 String fixedLocale = "fixed-" + locale + ".xml"; 138 fixXML(dir, locale + ".xml", dir, fixedLocale); 139 CLDRFile cldr = SimpleFactory.makeFromFile(dir + fixedLocale, locale, DraftStatus.approved); 140 for (Iterator<String> it3 = cldr.iterator(); it3.hasNext();) { 141 String path = it3.next(); 142 String value = cldr.getStringValue(path); 143 String fullPath = cldr.getFullXPath(path); 144 // skip bogus values 145 if (value.startsWith("//ldml") || value.length() == 0) { 146 Log.logln("Skipping: [" + value + "] for " + fullPath); 147 continue; 148 } 149 accum.add(stripAlt(path), dir, stripAlt(fullPath), value); 150 } 151 } 152 // now walk though items. If there is a single value, keep it 153 // otherwise show 154 Set<VettingInfo> uniquePathAndValue = new TreeSet<VettingInfo>(PathAndValueComparator); 155 CLDRFile cldrDelta = SimpleFactory.makeFile(locale); 156 boolean gotOne = false; 157 for (Iterator<String> it2 = accum.iterator(); it2.hasNext();) { 158 String path = it2.next(); 159 Collection<VettingInfo> c = accum.get(path); 160 uniquePathAndValue.clear(); 161 uniquePathAndValue.addAll(c); 162 if (uniquePathAndValue.size() == 1) { // no conflict 163 VettingInfo vi = uniquePathAndValue.iterator().next(); 164 cldrDelta.add(vi.fullPath, vi.value); 165 gotOne = true; 166 } else { // there is a conflict 167 conflictSet.add(new Object[] { locale, path, c }); 168 } 169 } 170 if (gotOne) { 171 Log.logln("Writing: " + targetDir + locale + ".xml"); 172 PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, locale + ".xml"); 173 cldrDelta.write(pw); 174 pw.close(); 175 } else { 176 Log.logln("No data left in: " + targetDir + locale + ".xml"); 177 } 178 } 179 showSources()180 public void showSources() { 181 for (Iterator<String> it = locale_files.keySet().iterator(); it.hasNext();) { 182 String key = it.next(); 183 Set<String> s = locale_files.get(key); 184 for (Iterator<String> it2 = s.iterator(); it2.hasNext();) { 185 Log.logln(key + " \t" + it2.next()); 186 key = ""; 187 } 188 } 189 } 190 fixXML(String inputDir, String inputFile, String outputDir, String outputFile)191 public void fixXML(String inputDir, String inputFile, String outputDir, String outputFile) throws IOException { 192 BufferedReader in = FileUtilities.openUTF8Reader(inputDir, inputFile); 193 PrintWriter out = FileUtilities.openUTF8Writer(outputDir, outputFile); 194 int haveLanguages = 0, haveScripts = 0, haveTerritories = 0, haveVariants = 0, haveKeys = 0, haveTypes = 0; 195 int inLocaleDisplayNames = 0; 196 while (true) { 197 String line = in.readLine(); 198 if (line == null) break; 199 String trimmed = line.trim(); 200 201 if (inLocaleDisplayNames == 1) { 202 haveLanguages = fixItem(out, haveLanguages, trimmed, "<language ", "languages"); 203 haveScripts = fixItem(out, haveScripts, trimmed, "<script ", "scripts"); 204 haveTerritories = fixItem(out, haveTerritories, trimmed, "<territory ", "territories"); 205 haveVariants = fixItem(out, haveVariants, trimmed, "<variant ", "variants"); 206 haveKeys = fixItem(out, haveKeys, trimmed, "<key ", "keys"); 207 haveTypes = fixItem(out, haveTypes, trimmed, "<type ", "types"); 208 } 209 210 if (trimmed.startsWith("<localeDisplayNames")) 211 inLocaleDisplayNames = 1; 212 else if (trimmed.startsWith("</localeDisplayNames")) inLocaleDisplayNames = 2; 213 214 out.println(line); 215 } 216 in.close(); 217 out.close(); 218 } 219 220 /** 221 * 222 */ fixItem(PrintWriter out, int haveLanguages, String trimmed, String item, String fix)223 private int fixItem(PrintWriter out, int haveLanguages, String trimmed, String item, String fix) { 224 if (trimmed.startsWith(item)) { 225 if (haveLanguages == 0) { 226 out.println("<" + fix + ">"); 227 haveLanguages = 1; 228 } 229 return haveLanguages; 230 } 231 if (haveLanguages == 1) { 232 out.println("</" + fix + ">"); 233 haveLanguages = 2; 234 } 235 return haveLanguages; 236 } 237 238 /** 239 * @return Returns the conflictSet. 240 */ getConflictSet()241 public Set<Object[]> getConflictSet() { 242 return conflictSet; 243 } 244 245 /** 246 * @param cldrFactory 247 * @throws IOException 248 * 249 */ showFiles(Factory cldrFactory, String targetDir)250 public void showFiles(Factory cldrFactory, String targetDir) throws IOException { 251 english = cldrFactory.make("en", true); 252 253 Log.logln(""); 254 Log.logln("A. Sources"); 255 Log.logln(""); 256 showSources(); 257 258 Log.logln(""); 259 Log.logln("B. Intermediate Results"); 260 Log.logln(""); 261 Set<String> vettedLocales = keySet(); 262 for (Iterator<String> it = vettedLocales.iterator(); it.hasNext();) { 263 incorporateVetting(it.next(), targetDir); 264 } 265 266 Log.logln(""); 267 Log.logln("C. Conflicts"); 268 Log.logln(""); 269 showConflicts(cldrFactory); 270 271 Log.logln(""); 272 Log.logln("D. Missing Vetting"); 273 Log.logln(""); 274 275 Set<String> availableLocales = new TreeSet<String>(cldrFactory.getAvailable()); 276 availableLocales.removeAll(vettedLocales); 277 278 for (Iterator<String> it = availableLocales.iterator(); it.hasNext();) { 279 String locale = it.next(); 280 CLDRFile cldr = cldrFactory.make(locale, false); 281 for (Iterator<String> it2 = cldr.iterator(); it2.hasNext();) { 282 String path = it2.next(); 283 String fullPath = cldr.getFullXPath(path); 284 if (fullPath.indexOf("[@draft=") >= 0) { 285 Log.logln(locale + " \t" + english.getName(locale) + "\texample: " + fullPath); 286 break; 287 } 288 } 289 } 290 } 291 292 CLDRFile english; 293 294 /** 295 * 296 */ showConflicts(Factory cldrFactory)297 private void showConflicts(Factory cldrFactory) { 298 299 Set<Object[]> s = getConflictSet(); 300 String lastLocale = ""; 301 CLDRFile cldr = null; 302 Transliterator any_latin = Transliterator.getInstance("any-latin"); 303 Set<String> emails = new LinkedHashSet<String>(); 304 String[] pieces = new String[5]; 305 306 for (Iterator<Object[]> it = s.iterator(); it.hasNext();) { 307 Object[] items = it.next(); 308 String entry = ""; 309 if (!lastLocale.equals(items[0])) { 310 showSet(emails); 311 lastLocale = (String) items[0]; 312 cldr = cldrFactory.make(lastLocale, false); 313 entry = "==========" + Utility.LINE_SEPARATOR + lastLocale + Utility.LINE_SEPARATOR; 314 } 315 String path = CLDRFile.getDistinguishingXPath((String) items[1], null, false); 316 String current = cldr.getStringValue(path); 317 entry += "\tpath:\t" + path + Utility.LINE_SEPARATOR + "\tcurrent value:\t" + getValue(any_latin, current) 318 + Utility.LINE_SEPARATOR; 319 320 entry += "\tEnglish value:\t" + getValue(any_latin, english.getStringValue(path)) + Utility.LINE_SEPARATOR; 321 Collection<VettingInfo> c = (Collection<VettingInfo>) items[2]; 322 for (Iterator<VettingInfo> it2 = c.iterator(); it2.hasNext();) { 323 VettingInfo vi = it2.next(); 324 entry += "\t\tvalue:\t" + getValue(any_latin, vi.value) + "\t source: " + vi.dir 325 + Utility.LINE_SEPARATOR; 326 // get third field, that's the email 327 Utility.split(vi.dir, '\\', pieces); 328 emails.add(pieces[2]); 329 } 330 331 if (false) { 332 System.out.println("path: " + path); 333 for (int i = 0; i < items.length; ++i) { 334 System.out.println("item[" + i + "]: " + items[i]); 335 } 336 } 337 Log.logln(entry); 338 } 339 showSet(emails); 340 } 341 342 /** 343 * 344 */ showSet(Set<String> emails)345 private void showSet(Set<String> emails) { 346 if (emails.size() == 0) return; 347 String result = "Emails:\t"; 348 for (Iterator<String> it = emails.iterator(); it.hasNext();) { 349 result += it.next() + ", "; 350 } 351 result += "cldr@unicode.org"; 352 emails.clear(); 353 Log.logln(result); 354 } 355 356 /** 357 * 358 */ getValue(Transliterator some, String current)359 private String getValue(Transliterator some, String current) { 360 if (current == null) current = "NULL"; 361 String other = some.transliterate(current); 362 return "<" + current + ">" + (other.equals(current) ? "" : "\t[" + other + "]"); 363 } 364 365 XPathParts tempParts = new XPathParts(null, null); 366 367 /** 368 * 369 */ stripAlt(String path)370 private String stripAlt(String path) { 371 tempParts.set(path); 372 Map<String, String> x = tempParts.getAttributes(tempParts.size() - 1); 373 String value = x.get("alt"); 374 if (value != null && value.startsWith("proposed")) { 375 x.remove("alt"); 376 // System.out.println(path + "\t=>\t" + tempParts.toString()); 377 return tempParts.toString(); 378 } 379 return path; 380 } 381 }