1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.FileInputStream; 5 import java.io.IOException; 6 import java.io.PrintWriter; 7 import java.io.StringWriter; 8 import java.util.ArrayList; 9 import java.util.Arrays; 10 import java.util.Calendar; 11 import java.util.Collections; 12 import java.util.Date; 13 import java.util.EnumSet; 14 import java.util.HashMap; 15 import java.util.HashSet; 16 import java.util.Iterator; 17 import java.util.LinkedHashMap; 18 import java.util.LinkedHashSet; 19 import java.util.List; 20 import java.util.Locale; 21 import java.util.Map; 22 import java.util.Map.Entry; 23 import java.util.Set; 24 import java.util.TreeMap; 25 import java.util.TreeSet; 26 import java.util.regex.Matcher; 27 import java.util.regex.Pattern; 28 29 import org.unicode.cldr.draft.FileUtilities; 30 import org.unicode.cldr.test.CheckExemplars; 31 import org.unicode.cldr.test.CoverageLevel2; 32 import org.unicode.cldr.test.DisplayAndInputProcessor; 33 import org.unicode.cldr.test.QuickCheck; 34 import org.unicode.cldr.tool.Option.Options; 35 import org.unicode.cldr.util.Builder; 36 import org.unicode.cldr.util.CLDRFile; 37 import org.unicode.cldr.util.CLDRPaths; 38 import org.unicode.cldr.util.Factory; 39 import org.unicode.cldr.util.FileCopier; 40 import org.unicode.cldr.util.LanguageTagParser; 41 import org.unicode.cldr.util.Level; 42 import org.unicode.cldr.util.PathDescription; 43 import org.unicode.cldr.util.PatternCache; 44 import org.unicode.cldr.util.PatternPlaceholders; 45 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo; 46 import org.unicode.cldr.util.PrettyPath; 47 import org.unicode.cldr.util.RegexLookup; 48 import org.unicode.cldr.util.RegexLookup.Finder; 49 import org.unicode.cldr.util.RegexUtilities; 50 import org.unicode.cldr.util.StandardCodes; 51 import org.unicode.cldr.util.StringId; 52 import org.unicode.cldr.util.SupplementalDataInfo; 53 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange; 54 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 55 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 56 import org.unicode.cldr.util.TransliteratorUtilities; 57 import org.unicode.cldr.util.With; 58 import org.unicode.cldr.util.XMLFileReader; 59 import org.unicode.cldr.util.XMLSource; 60 import org.unicode.cldr.util.XPathParts; 61 import org.xml.sax.Attributes; 62 import org.xml.sax.ContentHandler; 63 import org.xml.sax.ErrorHandler; 64 import org.xml.sax.InputSource; 65 import org.xml.sax.Locator; 66 import org.xml.sax.SAXException; 67 import org.xml.sax.SAXParseException; 68 import org.xml.sax.XMLReader; 69 70 import com.google.common.base.Joiner; 71 import com.ibm.icu.impl.Relation; 72 import com.ibm.icu.impl.Row; 73 import com.ibm.icu.impl.Row.R2; 74 import com.ibm.icu.lang.CharSequences; 75 import com.ibm.icu.text.BreakIterator; 76 import com.ibm.icu.text.DateFormat; 77 import com.ibm.icu.text.MessageFormat; 78 import com.ibm.icu.text.PluralRules; 79 import com.ibm.icu.text.SimpleDateFormat; 80 import com.ibm.icu.text.Transform; 81 import com.ibm.icu.text.UnicodeSet; 82 import com.ibm.icu.util.Output; 83 import com.ibm.icu.util.TimeZone; 84 import com.ibm.icu.util.ULocale; 85 86 public class GenerateXMB { 87 private static final String DEBUG_PATH = "[@type=\"day\"]/unitPattern[@count=\"1\"]"; 88 89 static StandardCodes sc = StandardCodes.make(); 90 91 static final String DATE; 92 static { 93 DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); 94 DATE = dateFormat.format(new Date()); 95 } 96 static final String stock = "en|ar|de|es|fr|it|ja|ko|nl|pl|ru|th|tr|pt|zh|zh_Hant|bg|ca|cs|da|el|fa|fi|fil|hi|hr|hu|id|lt|lv|ro|sk|sl|sr|sv|uk|vi|he|nb|et|ms|am|bn|gu|is|kn|ml|mr|sw|ta|te|ur|eu|gl|af|zu|en_GB|es_419|pt_PT|fr_CA|zh_Hant_HK"; 97 private static final HashSet<String> REGION_LOCALES = new HashSet<>(Arrays.asList(stock.split("\\|"))); 98 99 final static Options myOptions = new Options("In normal usage, you set the -t option for the target.") 100 .add("target", ".*", CLDRPaths.TMP_DIRECTORY + "dropbox/xmb/", 101 "The target directory for building. Will generate an English .xmb file, and .wsb files for other languages.") 102 .add( 103 "file", 104 ".*", 105 stock, 106 "Filter the information based on file name, using a regex argument. The '.xml' is removed from the file before filtering") 107 // "^(sl|fr)$", 108 .add("path", ".*", "Filter the information based on path name, using a regex argument") 109 // "dates.*(pattern|available)", 110 .add("content", ".*", "Filter the information based on content name, using a regex argument") 111 .add("jason", ".*", "Generate JSON versions instead") 112 .add("zone", null, "Show metazoneinfo and exit") 113 .add("wsb", ".*", "Show metazoneinfo and exit") 114 .add("kompare", ".*", CLDRPaths.BASE_DIRECTORY + "../DATA/cldr/common/google-bulk-imports", 115 "Compare data with directory; generate files in -target.") 116 .add("project_name", 'n', ".*", "CLDR", "The ID of the project."); 117 118 static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance(); 119 // static Matcher contentMatcher; 120 static Matcher pathMatcher; 121 static RegexLookup<String> pathFindRemover = new RegexLookup<String>().loadFromFile(GenerateXMB.class, 122 "xmbSkip.txt"); // .compile("//ldml/dates/calendars/calendar\\[@type=\"(?!gregorian).*").matcher(""); 123 static PrettyPath prettyPath = new PrettyPath(); 124 static int errors = 0; 125 static Relation<String, String> path2errors = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 126 127 // enum Handling {SKIP}; 128 static final Matcher datePatternMatcher = PatternCache.get("dates.*(pattern|available)").matcher(""); 129 130 public static final boolean DEBUG = false; 131 132 private static final HashSet<String> SKIP_LOCALES = new HashSet<>( 133 Arrays.asList(new String[] { "en", "root" })); 134 135 public static String DTD_VERSION; 136 137 private static String projectId; 138 139 enum PlaceholderType { 140 BRACES, // e.g. {NAME} 141 XML, // e.g. <ph name='NAME' /> 142 XML_EXAMPLE // e.g. <ph name='NAME' /><ex>EXAMPLE</ex>{0}</ph> 143 } 144 main(String[] args)145 public static void main(String[] args) throws Exception { 146 myOptions.parse(args, true); 147 Option option; 148 option = myOptions.get("zone"); 149 if (option.doesOccur()) { 150 showMetazoneInfo(); 151 return; 152 } 153 option = myOptions.get("file"); 154 String fileMatcherString = option.getValue(); 155 option = myOptions.get("content"); 156 Matcher contentMatcher = option.doesOccur() ? PatternCache.get(option.getValue()).matcher("") : null; 157 option = myOptions.get("path"); 158 pathMatcher = option.doesOccur() ? PatternCache.get(option.getValue()).matcher("") : null; 159 160 String targetDir = myOptions.get("target").getValue(); 161 countFile = FileUtilities.openUTF8Writer(targetDir + "/log/", "counts.txt"); 162 163 Factory cldrFactory1 = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 164 CLDRFile english = cldrFactory1.make("en", true); 165 CLDRFile englishTop = cldrFactory1.make("en", false); 166 DTD_VERSION = englishTop.getDtdVersion(); 167 168 CLDRFile root = cldrFactory1.make("en", true); 169 170 showDefaultContents(targetDir, english); 171 EnglishInfo englishInfo = new EnglishInfo(targetDir, english, root); 172 173 option = myOptions.get("kompare"); 174 if (option.doesOccur()) { 175 compareDirectory = option.getValue(); 176 compareFiles(fileMatcherString, contentMatcher, targetDir, cldrFactory1, english, englishInfo); 177 return; 178 } 179 180 if (myOptions.get("wsb").doesOccur()) { 181 displayWsb(myOptions.get("wsb").getValue(), englishInfo); 182 return; 183 } 184 185 projectId = myOptions.get("project_name").getValue(); 186 187 writeFile(targetDir, "en", englishInfo, english, true, false); 188 writeFile(targetDir + "/filtered/", "en", englishInfo, english, true, true); 189 190 // TODO: 191 // Replace {0}... with placeholders (Mostly done, but need better examples) 192 // Replace datetime fields (MMM, L, ...) with placeholders 193 // Skip items that we don't need translated (most language names, script names, deprecated region names, etc. 194 // Add descriptions 195 // Add pages with detailed descriptions, and links from the descriptions 196 // Represent the items with count= as ICUSyntax 197 // Filter items that we don't want to get translated, and add others that we need even if not in English 198 // Rewire items that are in undistinguished attributes 199 // Test each xml file for validity 200 // Generate strings that let the user choose the placeholder style hh vs HH,...??? 201 202 Factory cldrFactory2 = Factory.make(CLDRPaths.MAIN_DIRECTORY, fileMatcherString); 203 LanguageTagParser ltp = new LanguageTagParser(); 204 205 for (String file : cldrFactory2.getAvailable()) { 206 if (SKIP_LOCALES.contains(file)) { 207 continue; 208 } 209 210 // skip all locales with regions (with certain exceptions) 211 if (ltp.set(file).getRegion().length() != 0) { 212 if (!REGION_LOCALES.contains(file)) { 213 continue; 214 } 215 } 216 217 // skip anything without plural rules 218 final PluralInfo plurals = supplementalDataInfo.getPlurals(file, false); 219 if (plurals == null) { 220 System.out.println("Skipping " + file + ", no plural rules"); 221 continue; 222 } 223 224 CLDRFile cldrFile = cldrFactory2.make(file, true); 225 writeFile(targetDir + "/wsb/", file, englishInfo, cldrFile, false, false); 226 writeFile(targetDir + "/wsb/filtered/", file, englishInfo, cldrFile, false, true); 227 countFile.flush(); 228 } 229 countFile.close(); 230 PrintWriter errorFile = FileUtilities.openUTF8Writer(targetDir + "/log/", "errors.txt"); 231 for (Entry<String, Set<String>> entry : path2errors.keyValuesSet()) { 232 errorFile.println(entry); 233 } 234 errorFile.close(); 235 System.out.println("Errors: " + (errors + path2errors.size())); 236 } 237 compareFiles(String fileMatcherString, Matcher contentMatcher, String targetDir, Factory cldrFactory1, CLDRFile english, EnglishInfo englishInfo)238 private static void compareFiles(String fileMatcherString, Matcher contentMatcher, String targetDir, 239 Factory cldrFactory1, CLDRFile english, 240 EnglishInfo englishInfo) throws IOException { 241 SubmittedPathFixer fixer = new SubmittedPathFixer(); 242 Factory cldrFactory2 = Factory.make(compareDirectory, fileMatcherString); 243 PrintWriter output = null; 244 PrintWriter log = FileUtilities.openUTF8Writer(targetDir + "/log/", "skipped.txt"); 245 246 for (String file : cldrFactory2.getAvailable()) { 247 // System.out.println("Checking " + file); 248 CLDRFile submitted = cldrFactory2.make(file, false); 249 CLDRFile trunk = cldrFactory1.make(file, true); 250 for (String path : With.in(submitted.iterator(null, submitted.getComparator()))) { 251 if (pathMatcher != null && !pathMatcher.reset(path).matches()) { 252 continue; 253 } 254 String submittedValue = submitted.getStringValue(path); 255 if (contentMatcher != null && !contentMatcher.reset(submittedValue).matches()) { 256 continue; 257 } 258 PathStatus pathStatus = shouldSkipPath(path, submittedValue); 259 if (pathStatus == PathStatus.SKIP) { 260 continue; 261 } 262 263 // fix alt 264 String trunkPath = fixer.fix(path, false); 265 String trunkValue = trunk.getStringValue(trunkPath); 266 if (CharSequences.equals(submittedValue, trunkValue)) { 267 continue; 268 } 269 if (output == null) { 270 output = FileUtilities.openUTF8Writer(targetDir, file + ".txt"); 271 output.println("ID\tEnglish\tSource\tRelease\tDescription"); 272 } 273 String englishValue = english.getStringValue(trunkPath); 274 final PathInfo pathInfo = englishInfo.getPathInfo(trunkPath); 275 String description; 276 if (pathInfo == null) { 277 log.println(file + "\tDescription unavailable for " + trunkPath); 278 errors++; 279 String temp = fixer.fix(path, true); 280 englishInfo.getPathInfo(trunkPath); 281 continue; 282 } else { 283 description = pathInfo.getDescription(); 284 } 285 long id = StringId.getId(trunkPath); 286 if (englishValue == null) { 287 log.println(file + "\tEmpty English for " + trunkPath); 288 errors++; 289 continue; 290 } 291 output.println(id + "\t" + ssquote(englishValue, false) + "\t" + ssquote(submittedValue, false) + "\t" 292 + ssquote(trunkValue, true) + "\t" + description); 293 } 294 if (output != null) { 295 output.close(); 296 output = null; 297 } 298 log.flush(); 299 } 300 log.close(); 301 } 302 303 static Output<String[]> matches = new Output<>(); 304 static List<String> failures = new ArrayList<>(); 305 static Output<Finder> matcherFound = new Output<>(); 306 307 enum PathStatus { 308 SKIP, KEEP, MAYBE 309 } 310 shouldSkipPath(String path, String value)311 public static PathStatus shouldSkipPath(String path, String value) { 312 // skip if 313 List<String> myFailures = null; 314 if (false && path.contains("currencies") && path.contains("symbol")) { 315 myFailures = failures; 316 } 317 String skipPath = pathFindRemover.get(path, null, matches, matcherFound, myFailures); 318 if (myFailures != null && failures.size() != 0) { 319 System.out.println("Failures\n\t" + Joiner.on("\n\t").join(failures)); 320 failures.clear(); 321 } 322 if (skipPath == null || skipPath.equals("MAYBE")) { 323 return PathStatus.MAYBE; 324 } else if (skipPath.equals("VALUE")) { 325 return value.equals(matches.value[1]) ? PathStatus.SKIP : PathStatus.MAYBE; 326 } else if (skipPath.equals("SKIP")) { 327 return PathStatus.SKIP; 328 } else if (skipPath.equals("KEEP")) { 329 return PathStatus.KEEP; 330 } 331 throw new IllegalArgumentException("Unexpected xmbSkip.txt value: " + skipPath); 332 } 333 ssquote(String englishValue, boolean showRemoved)334 private static String ssquote(String englishValue, boolean showRemoved) { 335 if (englishValue == null) { 336 return showRemoved ? "[removed]" : "[empty]"; 337 } 338 englishValue = englishValue.replace("\"", """); 339 return englishValue; 340 } 341 342 static class SubmittedPathFixer { 343 private static final Pattern PATH_FIX = PatternCache.get("\\[@alt=\"" + 344 "(?:proposed|((?!proposed)[-a-zA-Z0-9]*)-proposed)" + 345 "-u\\d+-implicit[0-9.]+" + 346 "(?:-proposed-u\\d+-implicit[0-9.]+)?" + // NOTE: we allow duplicated alt values because of a generation 347 // bug. 348 // -proposed-u971-implicit2.0 349 "\"]"); 350 static Matcher pathFix = PATH_FIX.matcher(""); 351 fix(String path, boolean debug)352 public String fix(String path, boolean debug) { 353 if (pathFix.reset(path).find()) { 354 if (debug) { 355 // debug in case we get a mismatch 356 String temp = "REGEX:\t" + 357 RegexUtilities.showMismatch(PATH_FIX, path.substring(pathFix.start(0))); 358 } 359 final String group = pathFix.group(1); 360 String replacement = group == null ? "" : "[@alt=\"" + group + "\"]"; 361 String trunkPath = path.substring(0, pathFix.start(0)) + replacement + path.substring(pathFix.end(0)); 362 // HACK because of change in CLDR defaults 363 if (trunkPath.startsWith("//ldml/numbers/symbols/")) { 364 trunkPath = "//ldml/numbers/symbols[@numberSystem=\"latn\"]/" 365 + trunkPath.substring("//ldml/numbers/symbols/".length()); 366 } 367 return trunkPath; 368 } 369 return path; 370 } 371 372 } 373 showDefaultContents(String targetDir, CLDRFile english)374 private static void showDefaultContents(String targetDir, CLDRFile english) throws IOException { 375 PrintWriter out = FileUtilities.openUTF8Writer(targetDir + "/log/", "locales.txt"); 376 String[] locales = stock.split("\\|"); 377 Set<R2<String, String>> sorted = new TreeSet<>(); 378 for (String locale : locales) { 379 if (locale.isEmpty()) continue; 380 String name = english.getName(locale); 381 R2<String, String> row = Row.of(name, locale); 382 sorted.add(row); 383 } 384 Set<String> defaultContents = supplementalDataInfo.getDefaultContentLocales(); 385 386 for (R2<String, String> row : sorted) { 387 String locale = row.get1(); 388 String dlocale = getDefaultContentLocale(locale, defaultContents); 389 out.println(row.get0() + "\t" + locale + "\t" + english.getName(dlocale) + "\t" + dlocale); 390 } 391 out.close(); 392 } 393 getDefaultContentLocale(String locale, Set<String> defaultContents)394 private static String getDefaultContentLocale(String locale, Set<String> defaultContents) { 395 String best = null; 396 for (String s : defaultContents) { 397 if (s.startsWith(locale)) { 398 if (best == null) { 399 best = s; 400 } else if (s.length() < best.length()) { 401 best = s; 402 } 403 } 404 } 405 if (best == null) { 406 return locale; 407 } 408 return best; 409 } 410 411 static final Pattern COUNT_OR_ALT_ATTRIBUTE = PatternCache.get("\\[@(count)=\"([^\"]*)\"]"); 412 static final Pattern PLURAL_XPATH = Pattern 413 .compile("//ldml/(units/unit|numbers/(decimal|currency)Formats).*\\[@count=\"\\w+\"].*"); 414 static final Pattern SKIP_EXEMPLAR_TEST = PatternCache.get( 415 "/(currencySpacing" 416 + "|hourFormat" 417 + "|exemplarCharacters" 418 + "|pattern" 419 + "|localizedPatternChars" 420 + "|segmentations" 421 + "|dateFormatItem" 422 + "|references" 423 + "|unitPattern" 424 + "|intervalFormatItem" 425 + "|localeDisplayNames/variants/" 426 + "|commonlyUsed" 427 + "|currency.*/symbol" 428 + "|symbols/(exponential|nan))"); 429 430 static final Matcher skipExemplarTest = SKIP_EXEMPLAR_TEST.matcher(""); 431 static final UnicodeSet ASCII_LATIN = new UnicodeSet("[A-Za-z]").freeze(); 432 static final UnicodeSet LATIN = new UnicodeSet("[:sc=Latn:]").freeze(); 433 434 static final Matcher keepFromRoot = PatternCache.get("/(exemplarCity|currencies/currency.*/symbol)").matcher(""); 435 static final Matcher currencyDisplayName = Pattern 436 .compile("/currencies/currency\\[@type=\"([^\"]*)\"]/displayName").matcher(""); 437 writeFile(String targetDir, String localeId, EnglishInfo englishInfo, CLDRFile cldrFile, boolean isEnglish, boolean filter)438 private static void writeFile(String targetDir, String localeId, EnglishInfo englishInfo, CLDRFile cldrFile, 439 boolean isEnglish, boolean filter) throws IOException { 440 441 String extension = "xml"; 442 Relation<String, String> reasonsToPaths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 443 Set<String> seenStarred = new HashSet<>(); 444 445 Relation<String, Row.R2<PathInfo, String>> countItems = Relation.of( 446 new TreeMap<String, Set<Row.R2<PathInfo, String>>>(), TreeSet.class); 447 Matcher countMatcher = COUNT_OR_ALT_ATTRIBUTE.matcher(""); 448 int lineCount = 0; 449 int wordCount = 0; 450 int messageCount = 0; 451 452 StringWriter buffer = new StringWriter(); 453 PrintWriter out1 = new PrintWriter(buffer); 454 StringWriter buffer3 = new StringWriter(); 455 PrintWriter out3 = new PrintWriter(buffer3); 456 UnicodeSet exemplars = getExemplars(cldrFile); 457 458 for (PathInfo pathInfo : englishInfo) { 459 if (false && pathInfo.id == 46139888945574604L) { // for debugging 460 System.out.println("?"); 461 } 462 String path = pathInfo.getPath(); 463 String value; 464 if (isEnglish) { 465 value = pathInfo.englishValue; 466 } else { 467 value = cldrFile.getStringValue(path); 468 } 469 // Remove quotes from number formats (we'll put them back in during 470 // post-processing). 471 // TODO: we should actually call daip.processForDisplay() here, but 472 // it does more stuff than we need it to do, e.g. stripping the 473 // brackets from exemplarCharacters. 474 if (DisplayAndInputProcessor.NUMBER_FORMAT_XPATH.matcher(path).matches()) { 475 value = value.replace("'", ""); 476 } 477 478 // skip root if not English 479 if (!isEnglish && value != null && !keepFromRoot.reset(path).find()) { // note that mismatched script will 480 // be checked later 481 String locale = cldrFile.getSourceLocaleID(path, null); 482 if (locale.equals("root")) { 483 reasonsToPaths.put("root", path + "\t" + value); 484 continue; 485 } 486 if (locale.equals(XMLSource.CODE_FALLBACK_ID)) { 487 reasonsToPaths.put("codeFallback", path + "\t" + value); 488 continue; 489 } 490 } 491 boolean hasPlurals = PLURAL_XPATH.matcher(path).matches(); 492 if (filter && !hasPlurals) { 493 String starred = pathInfo.getStarredPath(); 494 if (seenStarred.contains(starred)) { 495 continue; 496 } 497 seenStarred.add(starred); 498 } 499 if (value == null) { 500 reasonsToPaths.put("missing", path + " " + value); 501 continue; 502 } 503 if (!isEnglish) { 504 String fullPath = cldrFile.getFullXPath(path); 505 if (fullPath.contains("draft")) { 506 XPathParts xpathParts = XPathParts.getFrozenInstance(fullPath); 507 String draftValue = xpathParts.getAttributeValue(-1, "draft"); 508 if (!draftValue.equals("contributed")) { 509 reasonsToPaths.put(draftValue, path + "\t" + value); 510 continue; 511 } 512 } 513 } 514 if (!isEnglish 515 && !exemplars.containsAll(value) 516 && !skipExemplarTest.reset(path).find()) { 517 // check for special cases in currency names. If the code itself occurs in the name, that's ok 518 // ldml/numbers/currencies/currency[@type="XXX"]/displayName 519 boolean bad = true; 520 if (currencyDisplayName.reset(path).find()) { 521 String code = currencyDisplayName.group(1); 522 String value2 = value.replace(code, ""); 523 bad = !exemplars.containsAll(value2); 524 } 525 if (bad) { 526 UnicodeSet diff = new UnicodeSet().addAll(value).removeAll(exemplars); 527 reasonsToPaths.put("exemplars", path + "\t" + value + "\t" + diff); 528 continue; 529 } 530 } 531 // String fullPath = cldrFile.getStringValue(path); 532 // //ldml/units/unit[@type="day"]/unitPattern[@count="one"] 533 if (hasPlurals) { 534 countMatcher.reset(path).find(); 535 String countLessPath = countMatcher.replaceAll(""); 536 countItems.put(countLessPath, Row.of(pathInfo, value)); 537 continue; 538 } 539 if (!isEnglish && pathInfo.changedEnglish) { 540 reasonsToPaths.put("changed-english", path); 541 } else { 542 writePathInfo(out1, pathInfo, value, isEnglish); 543 messageCount++; 544 } 545 if (isEnglish) { 546 writeJavaInfo(out3, pathInfo.getStringId(), pathInfo.getPath(), value); 547 } 548 wordCount += pathInfo.wordCount; 549 ++lineCount; 550 } 551 R2<Integer, Integer> lineWordCount = writeCountPathInfo(out1, out3, cldrFile.getLocaleID(), countItems, 552 isEnglish, filter); 553 messageCount += lineWordCount.get0(); 554 lineCount += lineWordCount.get0(); 555 wordCount += lineWordCount.get1(); 556 if (!filter && countItems.size() != lineWordCount.get0().intValue()) { 557 System.out.println(localeId + "\t" + countItems.size() + "\t" + lineWordCount.get0().intValue()); 558 } 559 out1.flush(); 560 out3.flush(); 561 562 String file = LanguageCodeConverter.toGoogleLocaleId(localeId); 563 String localeName = englishInfo.getName(localeId); 564 PrintWriter out = FileUtilities.openUTF8Writer(targetDir, file + "." + extension); 565 566 if (isEnglish) { 567 FileCopier.copy(GenerateXMB.class, "xmb-dtd.xml", out); 568 // FileUtilities.appendFile(GenerateXMB.class, "xmb-dtd.xml", out); 569 out.println("<!-- " + localeName + " -->"); 570 out.println("<messagebundle class='" + projectId + "'> <!-- version: " + DTD_VERSION + ", date: " + DATE 571 + " -->"); 572 out.println(buffer.toString()); 573 out.println("</messagebundle>"); 574 575 PrintWriter out3File = FileUtilities.openUTF8Writer(targetDir, "IdToPath.java"); 576 out3File.println("package org.unicode.cldr.tool;"); 577 out3File.println(); 578 out3File.println("import java.util.HashMap;"); 579 out3File.println(); 580 out3File.println("/**"); 581 out3File.println(" * Autogenerated by GenerateXMB for use by ConvertXTB."); 582 out3File.println(" * Do not manually edit this file."); 583 out3File.println(" */"); 584 out3File.println("public class IdToPath {"); 585 out3File.println(" static final HashMap<String,String> map = new HashMap<String,String>();"); 586 out3File.println(" public static String getPath(String id) {"); 587 out3File.println(" return map.get(id);"); 588 out3File.println(" }"); 589 out3File.println(" static {"); 590 out3File.println(" String[][] data = {"); 591 out3File.println(buffer3); 592 out3File.println(" };"); 593 out3File.println(" for (String[] pair : data) {"); 594 out3File.println(" map.put(pair[0], pair[1]);"); 595 out3File.println(" }"); 596 out3File.println(" }"); 597 out3File.println("}"); 598 out3File.close(); 599 } else { 600 601 // FileUtilities.appendFile(GenerateXMB.class, "wsb-dtd.xml", out); 602 FileCopier.copy(GenerateXMB.class, "wsb-dtd.xml", out); 603 out.println("<!-- " + localeName + " -->"); 604 out.println("<worldserverbundles lazarus_id='dummy' date='" + DATE + "'> <!-- version: " + DTD_VERSION 605 + " -->"); 606 out.println(" <worldserverbundle project_id='" + projectId + "' message_count='" + messageCount + "'>"); 607 out.println(buffer.toString()); 608 out.println(" </worldserverbundle>"); 609 out.println("</worldserverbundles>"); 610 } 611 out.close(); 612 QuickCheck.check(new File(targetDir, file + "." + extension)); 613 if (!filter) { 614 countFile.println(file + "\t" + lineCount + "\t" + wordCount); 615 } 616 if (!isEnglish && !filter) { 617 writeReasons(reasonsToPaths, targetDir, file); 618 } 619 } 620 writeJavaInfo(PrintWriter out3, String id, String path, String value)621 private static void writeJavaInfo(PrintWriter out3, String id, String path, String value) { 622 out3.println(" {\"" + id + "\",\"" + path.replace("\"", "\\\"") + "\",\"" 623 + value.replace("\\", "\\\\").replace("\"", "\\\"") + "\"},"); 624 } 625 getExemplars(CLDRFile cldrFile)626 private static UnicodeSet getExemplars(CLDRFile cldrFile) { 627 UnicodeSet exemplars = cldrFile.getExemplarSet("", CLDRFile.WinningChoice.WINNING); 628 boolean isLatin = exemplars.containsSome(ASCII_LATIN); 629 exemplars.addAll(CheckExemplars.AlwaysOK); 630 UnicodeSet auxExemplars = cldrFile.getExemplarSet("auxiliary", CLDRFile.WinningChoice.WINNING); 631 if (auxExemplars != null) { 632 exemplars.addAll(auxExemplars); 633 } 634 if (!isLatin) { 635 exemplars.removeAll(LATIN); 636 } 637 exemplars.freeze(); 638 return exemplars; 639 } 640 641 static final Pattern COUNT_ATTRIBUTE = PatternCache.get("\\[@count=\"([^\"]*)\"]"); 642 static final Pattern PLURAL_NUMBER = PatternCache.get("(decimal|number)Format"); 643 writeCountPathInfo(PrintWriter out, PrintWriter out3, String locale, Relation<String, R2<PathInfo, String>> countItems, boolean isEnglish, boolean filter)644 private static Row.R2<Integer, Integer> writeCountPathInfo(PrintWriter out, PrintWriter out3, String locale, 645 Relation<String, R2<PathInfo, String>> countItems, boolean isEnglish, boolean filter) { 646 Matcher m = COUNT_ATTRIBUTE.matcher(""); 647 int wordCount = 0; 648 PluralInfo pluralInfo = supplementalDataInfo.getPlurals(locale); 649 int lineCount = 0; 650 Set<String> errorSet = new LinkedHashSet<>(); 651 for (Entry<String, Set<R2<PathInfo, String>>> entry : countItems.keyValuesSet()) { 652 String countLessPath = entry.getKey(); 653 Map<String, String> fullValues = new TreeMap<>(); 654 PathInfo pathInfo = null; 655 String value = null; 656 for (R2<PathInfo, String> entry2 : entry.getValue()) { 657 PathInfo pathInfoN = entry2.get0(); 658 m.reset(pathInfoN.getPath()).find(); 659 String count = m.group(1); 660 if (count.equals("other")) { 661 pathInfo = pathInfoN; 662 } 663 value = entry2.get1(); 664 fullValues.put(count, value); 665 } 666 if (pathInfo == null) { 667 continue; 668 } 669 if (fullValues.size() < 2) { 670 // if we don't have two count values, skip 671 System.out.println(locale + "\tMust have 2 count values: " + entry.getKey()); 672 continue; 673 } 674 String fullPlurals = showPlurals(fullValues, locale, pathInfo, pluralInfo, isEnglish, errorSet); 675 if (fullPlurals == null) { 676 System.out.println(locale + "\tCan't format plurals for: " + entry.getKey() + "\t" + errorSet); 677 errors++; 678 continue; 679 } 680 681 out.println(); 682 out.println(" <!-- " 683 // + prettyPath.getPrettyPath(pathInfo.getPath(), false) + " ; " 684 + countLessPath + " -->"); 685 out.println(" <msg id='" + pathInfo.getStringId() + "' desc='" + pathInfo.description + "'"); 686 out.println(" >" + fullPlurals + "</msg>"); 687 // Use the last plural value in the loop because we only need it for example purposes. 688 writeJavaInfo(out3, pathInfo.getStringId(), countLessPath, value); 689 // if (!isEnglish || pathInfo.placeholderReplacements != null) { 690 // out.println("\t<!-- English original:\t" + pathInfo.getEnglishValue() + "\t-->"); 691 // } 692 out.flush(); 693 ++lineCount; 694 wordCount += pathInfo.wordCount * 3; 695 if (filter) { 696 break; 697 } 698 } 699 return Row.of(lineCount, wordCount); 700 } 701 702 static final String[] PLURAL_KEYS = { "=0", "=1", "zero", "one", "two", "few", "many", "other" }; 703 static final String[] EXTRA_PLURAL_KEYS = { "0", "1", "zero", "one", "two", "few", "many" }; 704 showPlurals(Map<String, String> values, String locale, PathInfo pathInfo, PluralInfo pluralInfo, boolean isEnglish, Set<String> errorSet)705 private static String showPlurals(Map<String, String> values, 706 String locale, PathInfo pathInfo, PluralInfo pluralInfo, 707 boolean isEnglish, Set<String> errorSet) { 708 errorSet.clear(); 709 /* 710 * Desired output for English XMB 711 * <msg desc= 712 * "[ICU Syntax] Plural forms for a number of hours. These are special messages: before translating, see cldr.org/translation/plurals." 713 * > 714 * {LENGTH, select, 715 * abbreviated { 716 * {NUMBER_OF_HOURS, plural, 717 * =0 {0 hrs} 718 * =1 {1 hr} 719 * zero {# hrs} 720 * one {# hrs} 721 * two {# hrs} 722 * few {# hrs} 723 * many {# hrs} 724 * other {# hrs}}} 725 * full { 726 * {NUMBER_OF_HOURS, plural, 727 * =0 {0 hours} 728 * =1 {1 hour} 729 * zero {# hours} 730 * one {# hours} 731 * two {# hours} 732 * few {# hours} 733 * many {# hours} 734 * other {# hours}}}} 735 * </msg> 736 * 737 * NOTE: For the WSB, the format has to match the following, WITHOUT LFs 738 * 739 * <msg id='1431840205484292448' desc='[ICU Syntax] who is viewing? This message requires special attention. 740 * Please follow the instructions here: 741 * https://sites.google.com/a/google.com/localization-info-site/Home/training/icusyntax'> 742 * <ph name='[PLURAL_NUM_USERS_OFFSET_1]' ex='Special placeholder used in [ICU Syntax] messages, see 743 * instructions page.'/> 744 * <ph name='[=0]'/>No one else is viewing. 745 * <ph name='[=1]'/><ph name='USERNAME' ex='Bob'/> is viewing. 746 * <ph name='[=2]'/><ph name='USERNAME' ex='Bob'/> and one other are viewing. 747 * <ph name='[ZERO]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 748 * <ph name='[ONE]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 749 * <ph name='[TWO]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 750 * <ph name='[FEW]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 751 * <ph name='[MANY]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 752 * <ph name='[OTHER]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 753 * <ph name='[END_PLURAL]'/> 754 * </msg> 755 */ 756 Matcher matcher = PLURAL_NUMBER.matcher(pathInfo.getPath()); 757 String var = null; 758 if (matcher.find()) { 759 // Plural doesn't use placeholders so create a label. 760 var = matcher.group(1).toUpperCase() + "_NUMBER"; 761 } else { 762 var = pathInfo.getFirstVariable(); 763 } 764 765 StringBuilder result = new StringBuilder(); 766 if (isEnglish) { 767 result.append('{') 768 // .append("PLURAL_") 769 .append(var).append(",plural,"); 770 } else { 771 result.append("<ph name='[PLURAL_").append(var).append("]'/>"); // ex='Special placeholder used in [ICU 772 // Syntax] messages, see instructions page.' 773 } 774 for (String key : PLURAL_KEYS) { 775 String value; 776 String coreKey = key.startsWith("=") ? key.substring(1, 2) : key; 777 value = values.get(coreKey); 778 if (value == null) { 779 if (key.startsWith("=")) { 780 String stringCount = key.substring(1); 781 // handle both =x case, and the category 782 int intCount = Integer.parseInt(stringCount); 783 Count count = pluralInfo.getCount(intCount); 784 value = values.get(count.toString()); 785 if (value == null) { 786 errorSet.add("Bad key/value " + key + "='" + value + "' in " + values); 787 return null; 788 } 789 value = value.replace("{0}", stringCount); 790 } else { 791 value = values.get("other"); 792 if (value == null) { 793 errorSet.add("No 'other' value in " + values); 794 return null; 795 } 796 } 797 } 798 String newValue = MessageFormat.format(MessageFormat.autoQuoteApostrophe(value), 799 new Object[] { key.startsWith("=") ? key.substring(1, 2) : "#" }); 800 PlaceholderType type = isEnglish ? PlaceholderType.BRACES : PlaceholderType.XML; 801 newValue = pathInfo.transformValue(newValue, type); 802 if (isEnglish) { 803 result.append("\n ").append(key).append(" {").append(newValue).append('}'); 804 } else { 805 String prefix = key.toUpperCase(Locale.ENGLISH); 806 result.append("<!--\n --><ph name='[").append(prefix).append("]'/>").append(newValue); 807 } 808 } 809 if (isEnglish) { 810 result.append('}'); 811 } else { 812 result.append("<!--\n --><ph name='[END_PLURAL]'/>"); 813 } 814 return result.toString(); 815 } 816 writePathInfo(PrintWriter out, PathInfo pathInfo, String value, boolean isEnglish)817 private static void writePathInfo(PrintWriter out, PathInfo pathInfo, String value, boolean isEnglish) { 818 out.println(); 819 out.println(" <!-- " + pathInfo.getPath() + " -->"); 820 out.println(" <msg id='" + pathInfo.getStringId() + "' desc='" + pathInfo.description + "'"); 821 PlaceholderType type = isEnglish ? PlaceholderType.XML_EXAMPLE : PlaceholderType.XML; 822 String transformValue = pathInfo.transformValue(value, type); 823 out.println(" >" + transformValue + "</msg>"); 824 value = TransliteratorUtilities.toHTML.transform(value); 825 if (!value.equals(transformValue) && (!isEnglish || pathInfo.placeholders != null)) { 826 out.println(" <!-- English original: " + value + " -->"); 827 } 828 out.flush(); 829 } 830 writeReasons(Relation<String, String> reasonsToPaths, String targetDir, String filename)831 private static void writeReasons(Relation<String, String> reasonsToPaths, String targetDir, String filename) 832 throws IOException { 833 targetDir += "/skipped/"; 834 filename += ".txt"; 835 PrintWriter out = FileUtilities.openUTF8Writer(targetDir, filename); 836 out.println("# " + DATE); 837 for (Entry<String, Set<String>> reasonToSet : reasonsToPaths.keyValuesSet()) { 838 for (String path : reasonToSet.getValue()) { 839 out.println(reasonToSet.getKey() + " " + path); 840 } 841 } 842 out.close(); 843 } 844 845 static class PathInfo implements Comparable<PathInfo> { 846 private static final Pattern PLACEHOLDER = PatternCache.get("\\{(\\d)}"); 847 848 private final String path; 849 private final Long id; 850 private final String stringId; 851 private final String englishValue; 852 private final boolean changedEnglish; 853 private final Map<String, PlaceholderInfo> placeholders; 854 private final String description; 855 private final String starredPath; 856 private final int wordCount; 857 858 private static final BreakIterator bi = BreakIterator.getWordInstance(ULocale.ENGLISH); 859 private static final UnicodeSet ALPHABETIC = new UnicodeSet("[:Alphabetic:]"); 860 PathInfo(String path, String englishValue, boolean changedEnglish, Map<String, PlaceholderInfo> placeholders, String description, String starredPath)861 public PathInfo(String path, String englishValue, boolean changedEnglish, 862 Map<String, PlaceholderInfo> placeholders, 863 String description, String starredPath) { 864 if (DEBUG_PATH != null && path.contains(DEBUG_PATH)) { 865 int x = 0; 866 } 867 if (description == null) { 868 path2errors.put(path, "missing description"); 869 } 870 this.path = path; 871 long id = StringId.getId(path); 872 this.id = id; 873 stringId = String.valueOf(id); 874 this.englishValue = englishValue; 875 this.changedEnglish = changedEnglish; 876 this.placeholders = placeholders; 877 this.description = description == null ? null : description.intern(); 878 this.starredPath = starredPath; 879 // count words 880 int tempCount = 0; 881 bi.setText(englishValue); 882 int start = bi.first(); 883 for (int end = bi.next(); end != BreakIterator.DONE; start = end, end = bi.next()) { 884 String word = englishValue.substring(start, end); 885 if (ALPHABETIC.containsSome(word)) { 886 ++tempCount; 887 } 888 } 889 wordCount = tempCount == 0 ? 1 : tempCount; 890 } 891 getFirstVariable()892 public String getFirstVariable() { 893 // ... name='FIRST_PART_OF_TEXT' ... 894 PlaceholderInfo info = placeholders.get("{0}"); 895 if (info == null) { 896 throw new IllegalArgumentException("Missing {0} for " + this); 897 } 898 return info.name; 899 } 900 getPath()901 public String getPath() { 902 return path; 903 } 904 getId()905 public Long getId() { 906 return id; 907 } 908 getStringId()909 public String getStringId() { 910 return stringId; 911 } 912 getEnglishValue()913 public String getEnglishValue() { 914 return englishValue; 915 } 916 getDescription()917 public String getDescription() { 918 return description; 919 } 920 getStarredPath()921 public String getStarredPath() { 922 return starredPath; 923 } 924 getPlaceholderReplacementsToOriginal()925 public Map<String, String> getPlaceholderReplacementsToOriginal() { 926 if (placeholders == null) return null; 927 Map<String, String> placeholderOutput = new LinkedHashMap<>(); 928 for (String id : placeholders.keySet()) { 929 placeholderOutput.put(id, getPlaceholderWithExample(id)); 930 } 931 return placeholderOutput; 932 } 933 getPlaceholderWithExample(String placeholder)934 private String getPlaceholderWithExample(String placeholder) { 935 PlaceholderInfo info = placeholders.get(placeholder); 936 // <ph name='x'><ex>xxx</ex>yyy</ph> 937 return "<ph name='" + info.name + "'><ex>" + info.example + "</ex>" + placeholder + "</ph>"; 938 } 939 940 // static DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser(); 941 transformValue(String value, PlaceholderType type)942 private String transformValue(String value, PlaceholderType type) { 943 value = TransliteratorUtilities.toHTML.transform(value); 944 if (placeholders == null) return value; 945 946 String placeholderFormat = ""; 947 switch (type) { 948 case BRACES: 949 placeholderFormat = "'{'{0}'}'"; 950 break; 951 case XML: 952 placeholderFormat = "<ph name=''[{0}]'' />"; 953 break; 954 case XML_EXAMPLE: 955 placeholderFormat = "<ph name=''{0}''><ex>{1}</ex>'{'{2}'}'</ph>"; 956 break; 957 } 958 Matcher matcher = PLACEHOLDER.matcher(value); 959 StringBuffer buffer = new StringBuffer(); 960 int start = 0; 961 while (matcher.find()) { 962 buffer.append(value.substring(start, matcher.start())); 963 PlaceholderInfo info = placeholders.get(matcher.group()); 964 buffer.append(MessageFormat.format(placeholderFormat, 965 new Object[] { info.name, info.example, matcher.group(1) })); 966 start = matcher.end(); 967 } 968 buffer.append(value.substring(start)); 969 return buffer.toString(); 970 } 971 replacePlaceholders(String value, String placeholderStart, String placeholderEnd)972 private String replacePlaceholders(String value, String placeholderStart, String placeholderEnd) { 973 Matcher matcher = PLACEHOLDER.matcher(value); 974 StringBuffer buffer = new StringBuffer(); 975 int start = 0; 976 while (matcher.find()) { 977 buffer.append(value.substring(start, matcher.start())); 978 String name = placeholders.get(matcher.group()).name; 979 buffer.append(placeholderStart).append(name).append(placeholderEnd); 980 start = matcher.end(); 981 } 982 buffer.append(value.substring(start)); 983 return buffer.toString(); 984 } 985 986 @Override compareTo(PathInfo arg0)987 public int compareTo(PathInfo arg0) { 988 return path.compareTo(arg0.path); 989 } 990 991 @Override toString()992 public String toString() { 993 return path; 994 } 995 } 996 997 static class EnglishInfo implements Iterable<PathInfo> { 998 999 final Map<String, PathInfo> pathToPathInfo = new TreeMap<>(); 1000 final Map<Long, PathInfo> longToPathInfo = new HashMap<>(); 1001 final CLDRFile english; 1002 getPathInfo(long hash)1003 PathInfo getPathInfo(long hash) { 1004 return longToPathInfo.get(hash); 1005 } 1006 getName(String localeId)1007 public String getName(String localeId) { 1008 return english.getName(localeId); 1009 } 1010 getPathInfo(String path)1011 PathInfo getPathInfo(String path) { 1012 return pathToPathInfo.get(path); 1013 } 1014 EnglishInfo(String targetDir, CLDRFile english, CLDRFile root)1015 EnglishInfo(String targetDir, CLDRFile english, CLDRFile root) throws Exception { 1016 1017 Map<String, String> oldPathValueMap = ReadXMB.load(CLDRPaths.BASE_DIRECTORY + 1018 "/cldr-tools/org/unicode/cldr/unittest/data/xmb/", 1019 "en.xml"); 1020 1021 PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance(); 1022 1023 this.english = english; 1024 // we don't want the fully resolved paths, but we do want the direct inheritance from root. 1025 //Status status = new Status(); 1026 Map<String, List<Set<String>>> starredPaths = new TreeMap<>(); 1027 1028 HashSet<String> metazonePaths = new HashSet<>(); 1029 // ^//ldml/dates/timeZoneNames/metazone\[@type="([^"]*)"] 1030 for (MetazoneInfo metazoneInfo : MetazoneInfo.METAZONE_LIST) { 1031 for (String item : metazoneInfo.getTypes()) { 1032 String path = "//ldml/dates/timeZoneNames/metazone[@type=\"" + metazoneInfo.metazoneId + "\"]" 1033 + item; 1034 metazonePaths.add(path); 1035 } 1036 } 1037 1038 // TODO add short countries 1039 HashSet<String> extraLanguages = new HashSet<>(); 1040 // ldml/localeDisplayNames/languages/language[@type=".*"] 1041 1042 for (String langId : PathDescription.EXTRA_LANGUAGES) { 1043 String langPath = "//ldml/localeDisplayNames/languages/language[@type=\"" + langId + "\"]"; 1044 extraLanguages.add(langPath); 1045 } 1046 1047 Set<String> sorted = Builder.with(new TreeSet<String>()) 1048 .addAll(english) 1049 .removeAll( 1050 new Transform<String, Boolean>() { 1051 @Override 1052 public Boolean transform(String source) { 1053 return source.startsWith("//ldml/dates/timeZoneNames/metazone") ? Boolean.TRUE 1054 : Boolean.FALSE; 1055 } 1056 }) 1057 .get(); 1058 sorted.addAll(metazonePaths); 1059 if (DEBUG) { 1060 TreeSet<String> diffs = new TreeSet<>(extraLanguages); 1061 diffs.removeAll(sorted); 1062 System.out.println(diffs); 1063 } 1064 sorted.addAll(extraLanguages); 1065 1066 // add the extra Count items. 1067 Map<String, String> extras = new HashMap<>(); 1068 Matcher m = COUNT_ATTRIBUTE.matcher(""); 1069 1070 for (String path : sorted) { 1071 if (path.contains("[@count=\"")) { 1072 m.reset(path).find(); 1073 for (String key : EXTRA_PLURAL_KEYS) { 1074 String path2 = path.substring(0, m.start(1)) + key + path.substring(m.end(1)); 1075 extras.put(path2, path); 1076 } 1077 } 1078 // if (path.contains("ellipsis")) { 1079 // System.out.println(path); 1080 // } 1081 } 1082 sorted.addAll(extras.keySet()); 1083 1084 Relation<String, String> reasonsToPaths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 1085 Set<String> missingDescriptions = new TreeSet<>(); 1086 //Output<String[]> pathArguments = new Output<String[]>(); 1087 1088 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance("en"); 1089 RegexLookup<Boolean> coverageAllow = new RegexLookup<Boolean>() 1090 .add("^//ldml/localeDisplayNames/keys/key", true) 1091 .add("^//ldml/localeDisplayNames/languages/language\\[@type=\"(jv|zxx|gsw|eo)\"]", true) 1092 .add("^//ldml/localeDisplayNames/scripts/script", true) 1093 .add("^//ldml/localeDisplayNames/types/type", true) 1094 .add( 1095 "^//ldml/dates/calendars/calendar\\[@type=\"[^\"]*\"]/dayPeriods/dayPeriodContext\\[@type=\"format\"]", 1096 true); 1097 1098 // TODO: for each count='other' path, add the other keywords and values 1099 PathDescription pathDescription = new PathDescription(GenerateXMB.supplementalDataInfo, english, extras, 1100 starredPaths, PathDescription.ErrorHandling.SKIP); 1101 1102 for (String path : sorted) { 1103 if (DEBUG_PATH != null && path.contains(DEBUG_PATH)) { 1104 int x = 0; 1105 } 1106 String value = english.getStringValue(path); 1107 Level level = coverageLevel.getLevel(path); 1108 if (value == null) { 1109 value = "[EMPTY]"; 1110 addSkipReasons(reasonsToPaths, "empty-value", level, path, value); 1111 continue; 1112 } 1113 if (pathMatcher != null 1114 && !pathMatcher.reset(path).find()) { 1115 addSkipReasons(reasonsToPaths, "path-parameter", level, path, value); 1116 continue; 1117 } 1118 PathStatus pathStatus = shouldSkipPath(path, value); 1119 if (pathStatus == PathStatus.SKIP) { 1120 addSkipReasons(reasonsToPaths, "path-remove", level, path, value); 1121 continue; 1122 } 1123 1124 if (level.compareTo(Level.MODERN) > 0 && pathStatus != PathStatus.KEEP) { 1125 if (coverageAllow.get(path) == null) { // HACK 1126 addSkipReasons(reasonsToPaths, "coverage", level, path, value); 1127 continue; 1128 } else { 1129 addSkipReasons(reasonsToPaths, "coverage*", level, path, value); 1130 continue; 1131 // System.out.println("Not skipping " + path); 1132 } 1133 } 1134 1135 String description = pathDescription.getDescription(path, value, level, null); 1136 EnumSet<PathDescription.Status> descriptionStatus = pathDescription.getStatus(); 1137 if (!descriptionStatus.isEmpty()) { 1138 addSkipReasons(reasonsToPaths, descriptionStatus.toString(), level, path, value); 1139 description = null; 1140 } else { 1141 description = "[ICU CLDR] " + description; 1142 } 1143 1144 String oldValue = oldPathValueMap.get(path); 1145 boolean changedEnglish = !value.equals(oldValue); 1146 PathInfo row = new PathInfo(path, value, changedEnglish, patternPlaceholders.get(path), description, 1147 pathDescription.getStarredPathOutput()); 1148 1149 if (description == PathDescription.MISSING_DESCRIPTION) { 1150 missingDescriptions.add(pathDescription.getStarredPathOutput()); 1151 } 1152 1153 Long hash = row.getId(); 1154 if (longToPathInfo.containsKey(hash)) { 1155 throw new IllegalArgumentException("Id collision for " 1156 + path + " and " + longToPathInfo.get(hash).getPath()); 1157 } 1158 pathToPathInfo.put(path, row); 1159 longToPathInfo.put(hash, row); 1160 if (value.contains("{0}") && patternPlaceholders.get(path) == null) { 1161 System.out.println("ERROR, no placeholders for {0}...: " + path + " ; " + value); 1162 } 1163 } 1164 1165 PrintWriter out = FileUtilities.openUTF8Writer(targetDir + "/log/", "en-paths.txt"); 1166 out.println("# " + DATE); 1167 for (Entry<String, List<Set<String>>> starredPath : starredPaths.entrySet()) { 1168 out.println(starredPath.getKey() + "\t\t" + starredPath.getValue()); 1169 } 1170 out.close(); 1171 out = FileUtilities.openUTF8Writer(targetDir + "/log/", "en-missingDescriptions.txt"); 1172 out.println("# " + DATE); 1173 for (String starredPath : missingDescriptions) { 1174 // ^//ldml/dates/timeZoneNames/zone\[@type=".*"]/exemplarCity ; ROOT timezone ; The name of a city in: 1175 // {0}. See cldr.org/xxxx. 1176 out.println(toRegexPath(starredPath) + "\t;\tDESCRIPTION\t" + starredPaths.get(starredPath)); 1177 } 1178 out.close(); 1179 writeReasons(reasonsToPaths, targetDir, "en"); 1180 } 1181 toRegexPath(String starredPath)1182 private String toRegexPath(String starredPath) { 1183 String result = starredPath.replace("[", "\\["); 1184 result = result.replace("\".*\"", "\"([^\"]*)\""); 1185 return "^" + result; 1186 } 1187 1188 @Override iterator()1189 public Iterator<PathInfo> iterator() { 1190 return pathToPathInfo.values().iterator(); 1191 } 1192 } 1193 addSkipReasons(Relation<String, String> reasonsToPaths, String descriptionStatus, Level level, String path, String value)1194 static void addSkipReasons(Relation<String, String> reasonsToPaths, String descriptionStatus, Level level, 1195 String path, String value) { 1196 reasonsToPaths.put(descriptionStatus + "\t" + level, path + "\t" + value); 1197 } 1198 1199 // Get Date-Time in milliseconds getDateTimeinMillis(int year, int month, int date)1200 private static long getDateTimeinMillis(int year, int month, int date) { 1201 Calendar cal = Calendar.getInstance(); 1202 cal.set(year, month, date); 1203 return cal.getTimeInMillis(); 1204 } 1205 1206 static final long START_TIME = getDateTimeinMillis(2000, 1, 0); 1207 static final long END_TIME = getDateTimeinMillis(2015, 1, 0); 1208 static final long DELTA_TIME = 15 * 60 * 1000; 1209 static final long MIN_DAYLIGHT_PERIOD = 90L * 24 * 60 * 60 * 1000; 1210 1211 static final Set<String> HAS_DAYLIGHT; 1212 static { 1213 Set<String> hasDaylightTemp = new HashSet<>(); 1214 Date date = new Date(); 1215 main: for (String zoneId : sc.getCanonicalTimeZones()) { 1216 TimeZone zone = TimeZone.getTimeZone(zoneId); 1217 for (long time = START_TIME + MIN_DAYLIGHT_PERIOD; time < END_TIME; time += MIN_DAYLIGHT_PERIOD) { 1218 date.setTime(time); 1219 if (zone.inDaylightTime(date)) { 1220 hasDaylightTemp.add(zoneId); 1221 if (false && !zone.useDaylightTime()) { 1222 System.out.println(zoneId + "\tuseDaylightTime()==false, but \tinDaylightTime(/" + date 1223 + "/)==true"); 1224 } 1225 continue main; 1226 } 1227 } 1228 } 1229 HAS_DAYLIGHT = Collections.unmodifiableSet(hasDaylightTemp); 1230 } 1231 1232 static final Set<String> SINGULAR_COUNTRIES; 1233 1234 private static PrintWriter countFile; 1235 static { 1236 // start with certain special-case countries 1237 Set<String> singularCountries = new HashSet<>( 1238 Arrays.asList("CL EC ES NZ PT AQ FM GL KI UM PF".split(" "))); 1239 1240 Map<String, Set<String>> countryToZoneSet = sc.getCountryToZoneSet(); 1241 1242 main: for (Entry<String, Set<String>> countryZones : countryToZoneSet.entrySet()) { 1243 String country = countryZones.getKey(); 1244 if (country.equals("001")) { 1245 continue; 1246 } 1247 Set<String> zones = countryZones.getValue(); 1248 if (zones.size() == 1) { 1249 singularCountries.add(country); 1250 continue; 1251 } 1252 // make a set of sets 1253 List<TimeZone> initial = new ArrayList<>(); 1254 for (String s : zones) { TimeZone.getTimeZone(s)1255 initial.add(TimeZone.getTimeZone(s)); 1256 } 1257 // now cycle through the times and see if we find any differences 1258 for (long time = START_TIME; time < END_TIME; time += DELTA_TIME) { 1259 int firstOffset = Integer.MIN_VALUE; 1260 for (TimeZone zone : initial) { 1261 int offset = zone.getOffset(time); 1262 if (firstOffset == Integer.MIN_VALUE) { 1263 firstOffset = offset; 1264 } else { 1265 if (firstOffset != offset) { 1266 if (false) 1267 System.out.println(country 1268 + " Difference at: " + new Date(time) 1269 + ", " + zone.getDisplayName() + " " + (offset / 1000.0 / 60 / 60) 1270 + ", " + initial.iterator().next().getDisplayName() + " " 1271 + (firstOffset / 1000.0 / 60 / 60)); 1272 continue main; 1273 } 1274 } 1275 } 1276 } 1277 singularCountries.add(country); 1278 } 1279 SINGULAR_COUNTRIES = Collections.unmodifiableSet(singularCountries); 1280 } 1281 1282 static final class MetazoneInfo { 1283 1284 /** 1285 * @param metazoneId 1286 * @param singleCountry 1287 * @param hasDaylight 1288 * @param zonesForCountry 1289 * @param regionToZone 1290 */ MetazoneInfo(String metazoneId, String golden, boolean singleCountry, boolean hasDaylight)1291 public MetazoneInfo(String metazoneId, String golden, boolean singleCountry, boolean hasDaylight) { 1292 this.golden = golden; 1293 this.metazoneId = metazoneId; 1294 this.singleCountry = singleCountry; 1295 this.hasDaylight = hasDaylight; 1296 } 1297 1298 static final String[] GENERIC = { "/long/generic", 1299 // "/short/generic" 1300 }; 1301 static final String[] DAYLIGHT = { "/long/generic", "/long/standard", "/long/daylight", 1302 // "/short/generic", "/short/standard", "/short/daylight" 1303 }; 1304 getTypes()1305 public String[] getTypes() { 1306 return hasDaylight ? DAYLIGHT : GENERIC; 1307 } 1308 1309 private final String metazoneId; 1310 private final String golden; 1311 private final boolean singleCountry; 1312 private final boolean hasDaylight; 1313 1314 static final List<MetazoneInfo> METAZONE_LIST; 1315 static { 1316 // Set<String> zones = supplementalDataInfo.getCanonicalTimeZones(); 1317 ArrayList<MetazoneInfo> result = new ArrayList<>(); 1318 1319 Map<String, String> zoneToCountry = sc.getZoneToCounty(); 1320 1321 Map<String, Map<String, String>> metazoneToRegionToZone = supplementalDataInfo.getMetazoneToRegionToZone(); 1322 for (String metazone : supplementalDataInfo.getAllMetazones()) { 1323 Map<String, String> regionToZone = metazoneToRegionToZone.get(metazone); 1324 String golden = regionToZone.get("001"); 1325 if (golden == null) { 1326 throw new IllegalArgumentException("Missing golden zone " + metazone + ", " + regionToZone); 1327 } 1328 String region = zoneToCountry.get(golden); 1329 boolean isSingleCountry = SINGULAR_COUNTRIES.contains(region); 1330 if (isSingleCountry) { 1331 continue; 1332 } 1333 1334 // TimeZone goldenZone = TimeZone.getTimeZone(golden); 1335 1336 Set<SupplementalDataInfo.MetaZoneRange> metazoneRanges = supplementalDataInfo.getMetaZoneRanges(golden); 1337 if (metazoneRanges == null) { 1338 throw new IllegalArgumentException("Missing golden zone " + metazone + ", " + regionToZone); 1339 } 1340 MetazoneInfo item = new MetazoneInfo(metazone, golden, isSingleCountry, HAS_DAYLIGHT.contains(golden)); 1341 result.add(item); 1342 } 1343 METAZONE_LIST = Collections.unmodifiableList(result); 1344 } 1345 1346 @Override toString()1347 public String toString() { 1348 return sc.getZoneToCounty().get(golden) 1349 + "\t" + metazoneId 1350 + "\t" + golden 1351 + "\t" + (singleCountry ? "singleCountry" : "") 1352 + "\t" + (hasDaylight ? "useDaylightTime" : "") 1353 // + ": " + zonesForCountry 1354 // + "\t" + regionToZone; 1355 ; 1356 } 1357 } 1358 showMetazoneInfo()1359 static void showMetazoneInfo() { 1360 System.out.println("\nZones in multiple metazones\n"); 1361 1362 for (String zone : sc.getCanonicalTimeZones()) { 1363 Set<SupplementalDataInfo.MetaZoneRange> metazoneRanges = supplementalDataInfo.getMetaZoneRanges(zone); 1364 if (metazoneRanges == null) { 1365 System.out.println("Zone doesn't have metazone! " + zone); 1366 continue; 1367 } 1368 if (metazoneRanges.size() != 1) { 1369 for (MetaZoneRange range : metazoneRanges) { 1370 System.out.println(zone + ":\t" + range); 1371 } 1372 System.out.println(); 1373 } 1374 } 1375 1376 System.out.println("\nMetazoneInfo\n"); 1377 1378 for (boolean singleCountry : new boolean[] { false }) { 1379 for (boolean hasDaylight : new boolean[] { false, true }) { 1380 for (MetazoneInfo mzone : MetazoneInfo.METAZONE_LIST) { 1381 if (mzone.hasDaylight != hasDaylight) continue; 1382 if (mzone.singleCountry != singleCountry) continue; 1383 System.out.println(mzone); 1384 } 1385 } 1386 } 1387 } 1388 displayWsb(String file, EnglishInfo info)1389 private static void displayWsb(String file, EnglishInfo info) { 1390 try { 1391 String[] parts = file.split("/"); 1392 ULocale locale = new ULocale(parts[parts.length - 2]); 1393 FileInputStream fis = new FileInputStream(file); 1394 XMLReader xmlReader = XMLFileReader.createXMLReader(false); 1395 xmlReader.setErrorHandler(new MyErrorHandler()); 1396 Map<String, String> data = new TreeMap<>(); 1397 xmlReader.setContentHandler(new MyContentHandler(locale, data, info)); 1398 InputSource is = new InputSource(fis); 1399 is.setSystemId(file); 1400 xmlReader.parse(is); 1401 fis.close(); 1402 for (Entry<String, String> entity : data.entrySet()) { 1403 String path = entity.getKey(); 1404 String value = entity.getValue(); 1405 PathInfo pathInfo = info.getPathInfo(path); 1406 System.out.println(value + "\t" + (pathInfo == null ? "?" : pathInfo.englishValue) + "\t" + path); 1407 } 1408 } catch (SAXParseException e) { 1409 System.out.println("\t" + "Can't read " + file); 1410 System.out.println("\t" + e.getClass() + "\t" + e.getMessage()); 1411 } catch (SAXException e) { 1412 System.out.println("\t" + "Can't read " + file); 1413 System.out.println("\t" + e.getClass() + "\t" + e.getMessage()); 1414 } catch (IOException e) { 1415 System.out.println("\t" + "Can't read " + file); 1416 System.out.println("\t" + e.getClass() + "\t" + e.getMessage()); 1417 } 1418 } 1419 1420 static class MyErrorHandler implements ErrorHandler { 1421 @Override error(SAXParseException exception)1422 public void error(SAXParseException exception) throws SAXException { 1423 System.out.println("\nerror: " + XMLFileReader.showSAX(exception)); 1424 throw exception; 1425 } 1426 1427 @Override fatalError(SAXParseException exception)1428 public void fatalError(SAXParseException exception) throws SAXException { 1429 System.out.println("\nfatalError: " + XMLFileReader.showSAX(exception)); 1430 throw exception; 1431 } 1432 1433 @Override warning(SAXParseException exception)1434 public void warning(SAXParseException exception) throws SAXException { 1435 System.out.println("\nwarning: " + XMLFileReader.showSAX(exception)); 1436 throw exception; 1437 } 1438 } 1439 1440 static class MyContentHandler implements ContentHandler { 1441 private static final boolean SHOW = false; 1442 private Map<String, String> myData; 1443 private EnglishInfo info; 1444 private PathInfo lastPathInfo; 1445 private StringBuilder currentText = new StringBuilder(); 1446 private long lastId; 1447 private String lastPluralTag; 1448 private Map<String, String> pluralTags = new LinkedHashMap<>(); 1449 private Set<String> pluralKeywords; 1450 MyContentHandler(ULocale locale, Map<String, String> data, EnglishInfo info)1451 public MyContentHandler(ULocale locale, Map<String, String> data, EnglishInfo info) { 1452 myData = data; 1453 this.info = info; 1454 PluralRules rules = PluralRules.forLocale(locale); 1455 pluralKeywords = Builder.with(new HashSet<String>()).addAll(rules.getKeywords()).add("0").add("1").freeze(); 1456 } 1457 1458 @Override characters(char[] arg0, int arg1, int arg2)1459 public void characters(char[] arg0, int arg1, int arg2) throws SAXException { 1460 String chars = String.valueOf(arg0, arg1, arg2); 1461 // if (SHOW) System.out.println("\t characters\t" + chars); 1462 currentText.append(chars); 1463 } 1464 1465 @Override endDocument()1466 public void endDocument() throws SAXException { 1467 if (SHOW) System.out.println("\t endDocument\t"); 1468 } 1469 1470 @Override endElement(String arg0, String arg1, String qName)1471 public void endElement(String arg0, String arg1, String qName) throws SAXException { 1472 // if (SHOW) System.out.println("\t endElement\t" + arg0 + "\t" + arg1 + "\t" + qName); 1473 if (qName.equals("msg")) { 1474 String chars = currentText.toString().replace("\n", "").trim(); 1475 if (lastPathInfo == null) { 1476 System.out.println("***Missing path info for " + lastId + "\t" + chars); 1477 // myData.put("*** Missing path: " + lastId, chars); 1478 } else if (pluralTags.size() != 0) { 1479 for (Entry<String, String> pluralTagEntry : pluralTags.entrySet()) { 1480 String pluralTag = pluralTagEntry.getKey(); 1481 String pluralTagValue = pluralTagEntry.getValue(); 1482 if (pluralKeywords.contains(pluralTag)) { 1483 String fixedCount = lastPathInfo.path.replace("other", pluralTag); 1484 myData.put(fixedCount, pluralTagValue); 1485 } else { 1486 System.out.println("***Skipping " + pluralTag + "\t" + pluralTagValue); 1487 } 1488 } 1489 // myData.put(lastPathInfo.path, pluralTags.toString()); 1490 pluralTags.clear(); 1491 } else { 1492 myData.put(lastPathInfo.path, chars); 1493 } 1494 currentText.setLength(0); 1495 } 1496 } 1497 1498 @Override endPrefixMapping(String arg0)1499 public void endPrefixMapping(String arg0) throws SAXException { 1500 if (SHOW) System.out.println("\t endPrefixMapping\t" + arg0); 1501 } 1502 1503 @Override ignorableWhitespace(char[] arg0, int arg1, int arg2)1504 public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException { 1505 if (SHOW) System.out.println("\t ignorableWhitespace\t" + String.valueOf(arg0, arg1, arg2)); 1506 } 1507 1508 @Override processingInstruction(String arg0, String arg1)1509 public void processingInstruction(String arg0, String arg1) throws SAXException { 1510 if (SHOW) System.out.println("\t processingInstruction\t" + arg0 + "\t" + arg1); 1511 } 1512 1513 @Override setDocumentLocator(Locator arg0)1514 public void setDocumentLocator(Locator arg0) { 1515 if (SHOW) System.out.println("\t setDocumentLocator\t" + arg0); 1516 } 1517 1518 @Override skippedEntity(String arg0)1519 public void skippedEntity(String arg0) throws SAXException { 1520 if (SHOW) System.out.println("\t skippedEntity\t" + arg0); 1521 } 1522 1523 @Override startDocument()1524 public void startDocument() throws SAXException { 1525 if (SHOW) System.out.println("\t startDocument\t"); 1526 } 1527 1528 @Override startElement(String arg0, String arg1, String qName, Attributes arg3)1529 public void startElement(String arg0, String arg1, String qName, Attributes arg3) throws SAXException { 1530 // if (SHOW) System.out.println("\t startElement\t" + arg0 + "\t" + arg1 + "\t" + qName + "\t" + 1531 // showAttributes(arg3)); 1532 if (qName.equals("msg")) { 1533 lastId = Long.parseLong(arg3.getValue("id")); 1534 lastPathInfo = info.getPathInfo(lastId); 1535 currentText.setLength(0); 1536 } else if (qName.equals("ph")) { 1537 String name = arg3.getValue("name"); 1538 String original = lastPathInfo.getPlaceholderReplacementsToOriginal().get(name); 1539 if (original != null) { 1540 currentText.append(original); 1541 } else if (name.startsWith("[PLURAL_")) { 1542 pluralTags.clear(); 1543 lastPluralTag = "[START_PLURAL]"; 1544 } else { 1545 String pluralTag = PLURAL_TAGS.get(name); 1546 if (pluralTag != null) { 1547 String chars = currentText.toString().replace("\n", "").trim(); 1548 pluralTags.put(lastPluralTag, chars); 1549 currentText.setLength(0); 1550 lastPluralTag = pluralTag; 1551 } else { 1552 System.out.println("***Can't find " + name + " in " 1553 + lastPathInfo.getPlaceholderReplacementsToOriginal()); 1554 } 1555 } 1556 } 1557 } 1558 showAttributes(Attributes atts)1559 private String showAttributes(Attributes atts) { 1560 String result = ""; 1561 for (int i = 0; i < atts.getLength(); ++i) { 1562 result += atts.getQName(i) + "=\"" + atts.getValue(i) + "\"\t"; 1563 } 1564 return result; 1565 } 1566 1567 @Override startPrefixMapping(String arg0, String arg1)1568 public void startPrefixMapping(String arg0, String arg1) throws SAXException { 1569 if (SHOW) System.out.println("\t startPrefixMapping\t" + arg0 + "\t" + arg1); 1570 } 1571 } 1572 1573 static final Map<String, String> PLURAL_TAGS = Builder.with(new HashMap<String, String>()) 1574 .put("[=0]", "0") 1575 .put("[=1]", "1") 1576 .put("[ZERO]", PluralRules.KEYWORD_ZERO) 1577 .put("[ONE]", PluralRules.KEYWORD_ONE) 1578 .put("[TWO]", PluralRules.KEYWORD_TWO) 1579 .put("[FEW]", PluralRules.KEYWORD_FEW) 1580 .put("[MANY]", PluralRules.KEYWORD_MANY) 1581 .put("[OTHER]", PluralRules.KEYWORD_OTHER) 1582 .put("[END_PLURAL]", "") 1583 .freeze(); 1584 1585 private static String compareDirectory; 1586 } 1587