1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.util.Comparator; 7 import java.util.HashSet; 8 import java.util.Set; 9 import java.util.TreeSet; 10 11 import org.unicode.cldr.draft.FileUtilities; 12 import org.unicode.cldr.util.CLDRFile; 13 import org.unicode.cldr.util.CLDRFile.Status; 14 import org.unicode.cldr.util.CLDRPaths; 15 import org.unicode.cldr.util.CldrUtility; 16 import org.unicode.cldr.util.Counter; 17 import org.unicode.cldr.util.EscapingUtilities; 18 import org.unicode.cldr.util.Factory; 19 import org.unicode.cldr.util.PrettyPath; 20 import org.unicode.cldr.util.SimpleFactory; 21 import org.unicode.cldr.util.Timer; 22 23 import com.ibm.icu.dev.util.CollectionUtilities; 24 import com.ibm.icu.impl.Row; 25 import com.ibm.icu.impl.Row.R2; 26 import com.ibm.icu.text.Collator; 27 import com.ibm.icu.text.NumberFormat; 28 import com.ibm.icu.text.UTF16; 29 30 public class GenerateComparison { 31 32 private static PrettyPath prettyPathMaker; 33 34 private static Collator collator = Collator.getInstance(); 35 36 static class EnglishRowComparator implements Comparator<R2<String, String>> { 37 private static Comparator<String> unicode = new UTF16.StringComparator(true, false, 0); 38 compare(R2<String, String> arg0, R2<String, String> arg1)39 public int compare(R2<String, String> arg0, R2<String, String> arg1) { 40 int result = collator.compare(arg0.get0(), arg1.get0()); 41 if (result != 0) return result; 42 result = unicode.compare(arg0.get0(), arg1.get0()); 43 if (result != 0) return result; 44 result = collator.compare(arg0.get1(), arg1.get1()); 45 if (result != 0) return result; 46 result = unicode.compare(arg0.get1(), arg1.get1()); 47 return result; 48 } 49 } 50 51 static EnglishRowComparator ENG = new EnglishRowComparator(); 52 53 static final String warningMessage = "<p><b>Warning: this chart is still under development. For how to use it, see <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\">Help: How to Vet</a>.</b></p>"; 54 main(String[] args)55 public static void main(String[] args) throws IOException { 56 57 // Setup 58 Timer timer = new Timer(); 59 Timer totalTimer = new Timer(); 60 long totalPaths = 0; 61 format = NumberFormat.getNumberInstance(); 62 format.setGroupingUsed(true); 63 64 Counter<String> totalCounter = new Counter<String>(); 65 66 // Get the args 67 68 String oldDirectory = CldrUtility.getProperty("oldDirectory", new File(CLDRPaths.BASE_DIRECTORY, 69 "common/main").getCanonicalPath() + "/"); 70 String newDirectory = CldrUtility.getProperty("newDirectory", new File(CLDRPaths.BASE_DIRECTORY, 71 "../cldr-release-1-7/common/main").getCanonicalPath() + "/"); 72 String changesDirectory = CldrUtility.getProperty("changesDirectory", new File(CLDRPaths.CHART_DIRECTORY 73 + "/changes/").getCanonicalPath() 74 + "/"); 75 76 String filter = CldrUtility.getProperty("localeFilter", ".*"); 77 boolean SHOW_ALIASED = CldrUtility.getProperty("showAliased", "false").toLowerCase().startsWith("t"); 78 79 // Create the factories 80 81 Factory oldFactory = Factory.make(oldDirectory, filter); 82 Factory newFactory = Factory.make(newDirectory, filter); 83 CLDRFile english = newFactory.make("en", true); 84 CLDRFile newRoot = newFactory.make("root", true); 85 86 // Get the union of all the language locales, sorted by English name 87 88 Set<String> oldList = oldFactory.getAvailableLanguages(); 89 Set<String> newList = newFactory.getAvailableLanguages(); 90 Set<String> unifiedList = new HashSet<String>(oldList); 91 unifiedList.addAll(newList); 92 Set<R2<String, String>> pairs = new TreeSet<R2<String, String>>(); 93 for (String code : unifiedList) { 94 pairs.add(Row.of(english.getName(code), code)); 95 } 96 97 prettyPathMaker = new PrettyPath(); 98 int totalDifferences = 0; 99 int differences = 0; 100 101 Set<R2<String, String>> indexInfo = new TreeSet<R2<String, String>>(ENG); 102 103 // iterate through those 104 for (R2<String, String> pair : pairs) { 105 timer.start(); 106 final String locale = pair.get1(); 107 final String localeName = pair.get0(); 108 System.out.println(locale); 109 differences = 0; 110 System.out.println(); 111 112 // Create CLDR files for both; null if can't open 113 114 CLDRFile oldFile = null; 115 if (oldList.contains(locale)) { 116 try { 117 oldFile = oldFactory.make(locale, true, true); 118 } catch (Exception e) { 119 addToIndex(indexInfo, "ERROR1.6 ", locale, localeName); 120 continue; 121 } 122 } else { 123 oldFile = SimpleFactory.makeFile(locale); // make empty file 124 } 125 CLDRFile newFile = null; 126 if (newList.contains(locale)) { 127 try { 128 newFile = newFactory.make(locale, true, true); 129 } catch (Exception e) { 130 addToIndex(indexInfo, "ERROR1.7 ", locale, localeName); 131 continue; 132 } 133 } else { 134 newFile = SimpleFactory.makeFile(locale); // make empty file 135 } 136 137 // for(String str : newFile) { 138 // String xo = newFile.getFullXPath(str); 139 // String v = newFile.getStringValue(str); 140 // 141 // System.out.println(xo+"\t"+v+"\n"); 142 // 143 // } 144 // Check for null cases 145 146 if (oldFile == null) { 147 addToIndex(indexInfo, "NEW ", locale, localeName); 148 continue; 149 } else if (newFile == null) { 150 addToIndex(indexInfo, "DELETED ", locale, localeName); 151 continue; 152 } 153 System.out.println("*** " + localeName + "\t" + locale); 154 System.out.println(); 155 156 // exclude aliased locales 157 if (newFile.isAliasedAtTopLevel()) { 158 continue; 159 } 160 161 // Get the union of all the paths 162 163 Set<String> paths; 164 try { 165 paths = new HashSet<String>(); 166 CollectionUtilities.addAll(oldFile.iterator(), paths); 167 if (oldList.contains(locale)) { 168 paths.addAll(oldFile.getExtraPaths()); 169 } 170 CollectionUtilities.addAll(newFile.iterator(), paths); 171 if (newList.contains(locale)) { 172 paths.addAll(newFile.getExtraPaths()); 173 } 174 } catch (Exception e) { 175 System.err.println("Locale: " + locale + ", " + localeName); 176 e.printStackTrace(); 177 addToIndex(indexInfo, "ERROR ", locale, localeName); 178 continue; 179 } 180 181 // We now have the full set of all the paths for old and new files 182 // TODO Sort by the pretty form 183 // Set<R2<String,String>> pathPairs = new TreeSet(); 184 // for (String code : unifiedList) { 185 // pairs.add(Row.make(code, english.getName(code))); 186 // } 187 188 // Initialize sets 189 // .addColumn("Code", "class='source'", "<a name=\"{0}\" href='likely_subtags.html#und_{0}'>{0}</a>", 190 // "class='source'", true) 191 192 final String localeDisplayName = english.getName(locale); 193 TablePrinter table = new TablePrinter() 194 .setCaption("Changes in " + localeDisplayName + " (" + locale + ")") 195 .addColumn("PRETTY_SORT1").setSortPriority(1).setHidden(true).setRepeatHeader(true) 196 .addColumn("PRETTY_SORT2").setSortPriority(2).setHidden(true) 197 .addColumn("PRETTY_SORT3").setSortPriority(3).setHidden(true) 198 .addColumn("ESCAPED_PATH").setHidden(true) 199 .addColumn("Inh.").setCellAttributes("class=\"{0}\"").setSortPriority(0).setSpanRows(true) 200 .setRepeatHeader(true) 201 .addColumn("Section").setSpanRows(true).setCellAttributes("class='section'") 202 .addColumn("Subsection").setSpanRows(true).setCellAttributes("class='subsection'") 203 .addColumn("Item").setSpanRows(true).setCellPattern("<a href=\"{4}\">{0}</a>") 204 .setCellAttributes("class='item'") 205 .addColumn("English").setCellAttributes("class='english'") 206 .addColumn("Status").setSortPriority(4).setCellAttributes("class=\"{0}\"") 207 .addColumn("Old" + localeDisplayName).setCellAttributes("class='old'") 208 .addColumn("New" + localeDisplayName).setCellAttributes("class='new'"); 209 Counter<String> fileCounter = new Counter<String>(); 210 211 for (String path : paths) { 212 if (path.contains("/alias") || path.contains("/identity")) { 213 continue; 214 } 215 String cleanedPath = CLDRFile.getNondraftNonaltXPath(path); 216 217 String oldValue = oldFile.getStringValue(cleanedPath); 218 String newValue = newFile.getStringValue(path); 219 String englishValue = english.getStringValue(cleanedPath); 220 221 // for debugging 222 if (oldValue != null && oldValue.contains("{1} {0}")) { 223 System.out.print(""); 224 } 225 226 if (equals(newValue, oldValue)) { 227 continue; 228 } 229 230 // get the actual place the data is stored 231 // AND adjust if the same as root! 232 233 Status newStatus = new Status(); 234 String newFoundLocale = getStatus(newFile, newRoot, path, newValue, newStatus); 235 236 // At this point, we have two unequal values 237 // TODO check for non-distinguishing attribute value differences 238 239 boolean isAliased = false; 240 241 // Skip deletions of alt-proposed 242 243 // if (newValue == null) { 244 // if (path.contains("@alt=\"proposed")) { 245 // continue; 246 // } 247 // } 248 249 // Skip if both inherited from the same locale, since we should catch it 250 // in that locale. 251 252 // Mark as aliased if new locale or path is different 253 if (!newStatus.pathWhereFound.equals(path)) { 254 isAliased = true; 255 // continue; 256 } 257 258 if (!newFoundLocale.equals(locale)) { 259 isAliased = true; 260 // continue; 261 } 262 263 // // skip if old locale or path is aliased 264 // if (!oldFoundLocale.equals(locale)) { 265 // //isAliased=true; 266 // continue; 267 // } 268 // 269 // // Skip if either found path is are different 270 // if (!oldStatus.pathWhereFound.equals(cleanedPath)) { 271 // //isAliased=true; 272 // continue; 273 // } 274 275 // Now check other aliases 276 277 // final boolean newIsAlias = !newStatus.pathWhereFound.equals(path); 278 // if (newIsAlias) { // new is alias 279 // // filter out cases of a new string that is found via alias 280 // if (oldValue == null) { 281 // continue; 282 // } 283 // 284 // } 285 286 if (isAliased && !SHOW_ALIASED) { 287 continue; 288 } 289 290 // We definitely have a difference worth recording, so do so 291 292 String newFullPath = newFile.getFullXPath(path); 293 final boolean reject = newFullPath != null && newFullPath.contains("@draft") 294 && !newFullPath.contains("@draft=\"contributed\""); 295 String status; 296 if (reject) { 297 status = "NOT-ACC"; 298 } else if (newValue == null) { 299 status = "deleted"; 300 } else if (oldValue == null) { 301 status = "added"; 302 } else { 303 status = "changed"; 304 } 305 String coreStatus = status; 306 if (isAliased) { 307 status = "I+" + status; 308 } 309 fileCounter.increment(status); 310 totalCounter.increment(status); 311 312 String pretty_sort = prettyPathMaker.getPrettyPath(cleanedPath); 313 String[] prettyPartsSort = pretty_sort.split("[|]"); 314 if (prettyPartsSort.length != 3) { 315 System.out.println("Bad pretty path: " + pretty_sort + ", original: " + cleanedPath); 316 } 317 String prettySort1 = prettyPartsSort[0]; 318 String prettySort2 = prettyPartsSort[1]; 319 String prettySort3 = prettyPartsSort[2]; 320 321 String pretty = prettyPathMaker.getOutputForm(pretty_sort); 322 String escapedPath = "http://unicode.org/cldr/apps/survey?_=" + locale + "&xpath=" 323 + EscapingUtilities.urlEscape(cleanedPath); 324 String[] prettyParts = pretty.split("[|]"); 325 if (prettyParts.length != 3) { 326 System.out.println("Bad pretty path: " + pretty + ", original: " + cleanedPath); 327 } 328 String pretty1 = prettyParts[0]; 329 String pretty2 = prettyParts[1]; 330 String pretty3 = prettyParts[2]; 331 332 // http://kwanyin.unicode.org/cldr-apps/survey?_=kw_GB&xpath=%2F%2Fldml%2FlocaleDisplayNames%2Flanguages%2Flanguage%5B%40type%3D%22mt%22%5D 333 334 table.addRow() 335 .addCell(prettySort1) 336 .addCell(prettySort2) 337 .addCell(prettySort3) 338 .addCell(escapedPath) 339 .addCell(isAliased ? "I" : "") 340 .addCell(pretty1) 341 .addCell(pretty2) 342 .addCell(pretty3) 343 .addCell(englishValue == null ? "-" : englishValue) 344 .addCell(coreStatus) 345 .addCell(oldValue == null ? "-" : oldValue) 346 .addCell(newValue == null ? "-" : newValue) 347 .finishRow(); 348 349 totalDifferences++; 350 differences++; 351 } 352 353 addToIndex(indexInfo, "", locale, localeName, fileCounter); 354 PrintWriter out = FileUtilities.openUTF8Writer(changesDirectory, locale + ".html"); 355 String title = "Changes in " + localeDisplayName; 356 out.println("<html>" 357 + 358 "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>" 359 + CldrUtility.LINE_SEPARATOR 360 + 361 "<title>" 362 + title 363 + "</title>" 364 + CldrUtility.LINE_SEPARATOR 365 + 366 "<link rel='stylesheet' href='index.css' type='text/css'>" 367 + CldrUtility.LINE_SEPARATOR 368 + 369 "<base target='_blank'>" 370 + CldrUtility.LINE_SEPARATOR 371 + 372 "</head><body>" 373 + CldrUtility.LINE_SEPARATOR 374 + 375 "<h1>" 376 + title 377 + "</h1>" 378 + CldrUtility.LINE_SEPARATOR 379 + "<a href='index.html'>Index</a> | <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>" 380 + warningMessage); 381 382 TablePrinter table2 = new TablePrinter() 383 .setCaption("Totals") 384 .addColumn("Inh.").setSortPriority(0) 385 .addColumn("Status").setSortPriority(1) 386 .addColumn("Total"); 387 388 for (String key : fileCounter.getKeysetSortedByKey()) { 389 boolean inherited = key.startsWith("I+"); 390 table2.addRow() 391 .addCell(inherited ? "I" : "") 392 .addCell(inherited ? key.substring(2) : key) 393 .addCell(format.format(fileCounter.getCount(key))) 394 .finishRow(); 395 } 396 out.println(table2); 397 out.println("<br>"); 398 out.println(table); 399 400 // show status on console 401 402 System.out.println(locale + "\tDifferences:\t" + format.format(differences) 403 + "\tPaths:\t" + format.format(paths.size()) 404 + "\tTime:\t" + timer); 405 406 totalPaths += paths.size(); 407 out.println(ShowData.dateFooter()); 408 out.println(CldrUtility.ANALYTICS); 409 out.println("</body></html>"); 410 out.close(); 411 } 412 PrintWriter indexFile = FileUtilities.openUTF8Writer(changesDirectory, "index.html"); 413 indexFile 414 .println("<html>" 415 + 416 "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>" 417 + CldrUtility.LINE_SEPARATOR 418 + 419 "<title>" 420 + "Change Summary" 421 + "</title>" 422 + CldrUtility.LINE_SEPARATOR 423 + 424 "<link rel='stylesheet' href='index.css' type='text/css'>" 425 + CldrUtility.LINE_SEPARATOR 426 + 427 "<base target='_blank'>" 428 + CldrUtility.LINE_SEPARATOR 429 + 430 "</head><body>" 431 + CldrUtility.LINE_SEPARATOR 432 + 433 "<h1>" 434 + "Change Summary" 435 + "</h1>" 436 + CldrUtility.LINE_SEPARATOR 437 + "<a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>" 438 + warningMessage 439 + "<table><tr>"); 440 441 String separator = ""; 442 int last = 0; 443 for (R2<String, String> indexPair : indexInfo) { 444 int firstChar = indexPair.get0().codePointAt(0); 445 indexFile.append(firstChar == last ? separator 446 : (last == 0 ? "" : "</td></tr>\n<tr>") + "<th>" + String.valueOf((char) firstChar) + "</th><td>") 447 .append(indexPair.get1()); 448 separator = " | "; 449 last = indexPair.get0().codePointAt(0); 450 } 451 indexFile.println("</tr></table>"); 452 indexFile.println(ShowData.dateFooter()); 453 indexFile.println(CldrUtility.ANALYTICS); 454 indexFile.println("</body></html>"); 455 indexFile.close(); 456 457 System.out.println(); 458 459 for (String key : totalCounter.getKeysetSortedByKey()) { 460 System.out.println(key + "\t" + totalCounter.getCount(key)); 461 } 462 463 System.out.println("Total Differences:\t" + format.format(totalDifferences) 464 + "\tPaths:\t" + format.format(totalPaths) 465 + "\tTotal Time:\t" + format.format(totalTimer.getDuration()) + "ms"); 466 } 467 468 // static Transliterator urlHex = Transliterator.createFromRules("foo", 469 // "([^!(-*,-\\:A-Z_a-z~]) > &hex($1) ;" + 470 // ":: null;" + 471 // "'\\u00' > '%' ;" 472 // , Transliterator.FORWARD); 473 474 private static NumberFormat format; 475 addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale, final String localeName)476 private static void addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale, 477 final String localeName) { 478 addToIndex(indexInfo, title, locale, localeName, null); 479 } 480 addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale, final String localeName, Counter<String> fileCounter)481 private static void addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale, 482 final String localeName, Counter<String> fileCounter) { 483 if (title.startsWith("ERROR")) { 484 indexInfo.add(R2.of(localeName, 485 title + " " + localeName + " (" + locale + ")")); 486 return; 487 } 488 String counterString = ""; 489 if (fileCounter != null) { 490 for (String s : fileCounter) { 491 if (counterString.length() != 0) { 492 counterString += "; "; 493 } 494 counterString += s.charAt(0) + ":" + format.format(fileCounter.getCount(s)); 495 } 496 } 497 indexInfo.add(R2.of(localeName, 498 "<a href='" + locale + ".html'>" + title + localeName + " (" + locale + ")</a>" 499 + (counterString.length() == 0 ? "" : " [" + counterString + "]"))); 500 } 501 502 // private static int accumulate(Set<R2<String,String>> rejected, int totalRejected, 503 // final String locale, String indicator, String oldValue, String newValue, String path) { 504 // String pretty = prettyPathMaker.getPrettyPath(path, false); 505 // String line = locale + "\t" + indicator +"\t\u200E[" + oldValue + "]\u200E\t\u200E[" + newValue + "]\u200E\t" + 506 // pretty; 507 // String pretty2 = prettyPathMaker.getOutputForm(pretty); 508 // rejected.add(Row.make(pretty2, line)); 509 // totalRejected++; 510 // return totalRejected; 511 // } 512 getStatus(CLDRFile oldFile, CLDRFile oldRoot, String path, String oldString, Status oldStatus)513 private static String getStatus(CLDRFile oldFile, CLDRFile oldRoot, String path, 514 String oldString, Status oldStatus) { 515 String oldLocale = oldFile.getSourceLocaleID(path, oldStatus); 516 if (!oldLocale.equals("root")) { 517 String oldRootValue = oldRoot.getStringValue(oldStatus.pathWhereFound); 518 if (equals(oldString, oldRootValue)) { 519 oldLocale = "root"; 520 } 521 } 522 return oldLocale; 523 } 524 showSet(PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title)525 private static void showSet(PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title) { 526 if (rejected.size() != 0) { 527 out.println(); 528 out.println(locale + "\t" + title + "\t" + rejected.size()); 529 for (R2<String, String> prettyAndline : rejected) { 530 out.println(prettyAndline.get1()); 531 } 532 } 533 } 534 equals(String newString, String oldString)535 private static boolean equals(String newString, String oldString) { 536 if (newString == null) { 537 return oldString == null; 538 } 539 return newString.equals(oldString); 540 } 541 542 } 543