1 package org.unicode.cldr.json; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.text.ParseException; 7 import java.util.ArrayList; 8 import java.util.Collections; 9 import java.util.HashMap; 10 import java.util.Iterator; 11 import java.util.List; 12 import java.util.Map; 13 import java.util.Set; 14 import java.util.TreeMap; 15 import java.util.TreeSet; 16 import java.util.concurrent.atomic.AtomicInteger; 17 import java.util.regex.Matcher; 18 import java.util.regex.Pattern; 19 20 import org.unicode.cldr.draft.FileUtilities; 21 import org.unicode.cldr.draft.ScriptMetadata; 22 import org.unicode.cldr.draft.ScriptMetadata.Info; 23 import org.unicode.cldr.tool.Option.Options; 24 import org.unicode.cldr.util.Annotations; 25 import org.unicode.cldr.util.CLDRConfig; 26 import org.unicode.cldr.util.CLDRFile; 27 import org.unicode.cldr.util.CLDRFile.DraftStatus; 28 import org.unicode.cldr.util.CLDRPaths; 29 import org.unicode.cldr.util.CLDRTool; 30 import org.unicode.cldr.util.CldrUtility; 31 import org.unicode.cldr.util.CoverageInfo; 32 import org.unicode.cldr.util.DtdData; 33 import org.unicode.cldr.util.DtdType; 34 import org.unicode.cldr.util.Factory; 35 import org.unicode.cldr.util.FileProcessor; 36 import org.unicode.cldr.util.Level; 37 import org.unicode.cldr.util.LocaleIDParser; 38 import org.unicode.cldr.util.PatternCache; 39 import org.unicode.cldr.util.StandardCodes; 40 import org.unicode.cldr.util.SupplementalDataInfo; 41 import org.unicode.cldr.util.XPathParts; 42 43 import com.google.common.base.Joiner; 44 import com.google.gson.Gson; 45 import com.google.gson.GsonBuilder; 46 import com.google.gson.JsonArray; 47 import com.google.gson.JsonObject; 48 import com.google.gson.JsonPrimitive; 49 import com.google.gson.stream.JsonWriter; 50 51 /** 52 * Utility methods to extract data from CLDR repository and export it in JSON 53 * format. 54 * 55 * @author shanjian / emmons 56 */ 57 @CLDRTool(alias = "ldml2json", description = "Convert CLDR data to JSON") 58 public class Ldml2JsonConverter { 59 private static boolean DEBUG = false; 60 61 private enum RunType { 62 main, supplemental, segments, rbnf, annotations, annotationsDerived 63 } 64 65 private static final StandardCodes sc = StandardCodes.make(); 66 private Set<String> defaultContentLocales = SupplementalDataInfo.getInstance().getDefaultContentLocales(); 67 private Set<String> skippedDefaultContentLocales = new TreeSet<>(); 68 69 private class availableLocales { 70 Set<String> modern = new TreeSet<>(); 71 Set<String> full = new TreeSet<>(); 72 } 73 74 private availableLocales avl = new availableLocales(); 75 private Gson gson = new GsonBuilder().setPrettyPrinting().create(); 76 private static final Options options = new Options( 77 "Usage: LDML2JsonConverter [OPTIONS] [FILES]\n" + 78 "This program converts CLDR data to the JSON format.\n" + 79 "Please refer to the following options. \n" + 80 "\texample: org.unicode.cldr.json.Ldml2JsonConverter -c xxx -d yyy") 81 .add("commondir", 'c', ".*", CLDRPaths.COMMON_DIRECTORY, 82 "Common directory for CLDR files, defaults to CldrUtility.COMMON_DIRECTORY") 83 .add("destdir", 'd', ".*", CLDRPaths.GEN_DIRECTORY, 84 "Destination directory for output files, defaults to CldrUtility.GEN_DIRECTORY") 85 .add("match", 'm', ".*", ".*", 86 "Regular expression to define only specific locales or files to be generated") 87 .add("type", 't', "(main|supplemental|segments|rbnf|annotations|annotationsDerived)", "main", 88 "Type of CLDR data being generated, main, supplemental, or segments.") 89 .add("resolved", 'r', "(true|false)", "false", 90 "Whether the output JSON for the main directory should be based on resolved or unresolved data") 91 .add("draftstatus", 's', "(approved|contributed|provisional|unconfirmed)", "unconfirmed", 92 "The minimum draft status of the output data") 93 .add("coverage", 'l', "(minimal|basic|moderate|modern|comprehensive|optional)", "optional", 94 "The maximum coverage level of the output data") 95 .add("fullnumbers", 'n', "(true|false)", "false", 96 "Whether the output JSON should output data for all numbering systems, even those not used in the locale") 97 .add("other", 'o', "(true|false)", "false", 98 "Whether to write out the 'other' section, which contains any unmatched paths") 99 .add("packages", 'p', "(true|false)", "false", 100 "Whether to group data files into installable packages") 101 .add("identity", 'i', "(true|false)", "true", 102 "Whether to copy the identity info into all sections containing data") 103 .add("konfig", 'k', ".*", null, "LDML to JSON configuration file"); 104 main(String[] args)105 public static void main(String[] args) throws Exception { 106 options.parse(args, true); 107 108 Ldml2JsonConverter l2jc = new Ldml2JsonConverter( 109 options.get("commondir").getValue(), 110 options.get("destdir").getValue(), 111 options.get("type").getValue(), 112 Boolean.parseBoolean(options.get("fullnumbers").getValue()), 113 Boolean.parseBoolean(options.get("resolved").getValue()), 114 options.get("coverage").getValue(), 115 options.get("match").getValue(), 116 Boolean.parseBoolean(options.get("packages").getValue()), 117 options.get("konfig").getValue()); 118 119 long start = System.currentTimeMillis(); 120 DraftStatus status = DraftStatus.valueOf(options.get("draftstatus").getValue()); 121 l2jc.processDirectory(options.get("type").getValue(), status); 122 long end = System.currentTimeMillis(); 123 System.out.println("Finished in " + (end - start) + " ms"); 124 } 125 126 // The CLDR file directory where those official XML files will be found. 127 private String cldrCommonDir; 128 // Where the generated JSON files will be stored. 129 private String outputDir; 130 // Whether data in main should output all numbering systems, even those not in use in the locale. 131 private boolean fullNumbers; 132 // Whether data in main should be resolved for output. 133 private boolean resolve; 134 // Used to match specific locales for output 135 private String match; 136 // Used to filter based on coverage 137 private int coverageValue; 138 // Whether we should write output files into installable packages 139 private boolean writePackages; 140 // Type of run for this converter: main, supplemental, or segments 141 private RunType type; 142 143 private class JSONSection implements Comparable<JSONSection> { 144 public String section; 145 public Pattern pattern; 146 public String packageName; 147 148 @Override compareTo(JSONSection other)149 public int compareTo(JSONSection other) { 150 return section.compareTo(other.section); 151 } 152 153 } 154 155 private Map<String, String> dependencies; 156 private List<JSONSection> sections; 157 private Set<String> packages; 158 Ldml2JsonConverter(String cldrDir, String outputDir, String runType, boolean fullNumbers, boolean resolve, String coverage, String match, boolean writePackages, String configFile)159 public Ldml2JsonConverter(String cldrDir, String outputDir, String runType, boolean fullNumbers, boolean resolve, String coverage, String match, 160 boolean writePackages, String configFile) { 161 this.cldrCommonDir = cldrDir; 162 this.outputDir = outputDir; 163 this.type = RunType.valueOf(runType); 164 this.fullNumbers = fullNumbers; 165 this.resolve = resolve; 166 this.match = match; 167 this.writePackages = writePackages; 168 this.coverageValue = Level.get(coverage).getLevel(); 169 170 sections = new ArrayList<>(); 171 packages = new TreeSet<>(); 172 dependencies = new HashMap<>(); 173 174 FileProcessor myReader = new FileProcessor() { 175 @Override 176 protected boolean handleLine(int lineCount, String line) { 177 String[] lineParts = line.trim().split("\\s*;\\s*"); 178 String key, value, section = null, path = null, packageName = null, dependency = null; 179 boolean hasSection = false; 180 boolean hasPath = false; 181 boolean hasPackage = false; 182 boolean hasDependency = false; 183 for (String linePart : lineParts) { 184 int pos = linePart.indexOf('='); 185 if (pos < 0) { 186 throw new IllegalArgumentException(); 187 } 188 key = linePart.substring(0, pos); 189 value = linePart.substring(pos + 1); 190 if (key.equals("section")) { 191 hasSection = true; 192 section = value; 193 } else if (key.equals("path")) { 194 hasPath = true; 195 path = value; 196 } else if (key.equals("package")) { 197 hasPackage = true; 198 packageName = value; 199 } else if (key.equals("dependency")) { 200 hasDependency = true; 201 dependency = value; 202 } 203 } 204 if (hasSection && hasPath) { 205 JSONSection j = new JSONSection(); 206 j.section = section; 207 j.pattern = PatternCache.get(path); 208 if (hasPackage) { 209 j.packageName = packageName; 210 } 211 sections.add(j); 212 } 213 if (hasDependency && hasPackage) { 214 dependencies.put(packageName, dependency); 215 } 216 return true; 217 } 218 }; 219 220 if (configFile != null) { 221 myReader.process(configFile); 222 } else { 223 switch (type) { 224 case main: 225 myReader.process(Ldml2JsonConverter.class, "JSON_config.txt"); 226 break; 227 case supplemental: 228 myReader.process(Ldml2JsonConverter.class, "JSON_config_supplemental.txt"); 229 break; 230 case segments: 231 myReader.process(Ldml2JsonConverter.class, "JSON_config_segments.txt"); 232 break; 233 case rbnf: 234 myReader.process(Ldml2JsonConverter.class, "JSON_config_rbnf.txt"); 235 break; 236 default: 237 myReader.process(Ldml2JsonConverter.class, "JSON_config_"+type.name()+".txt"); 238 } 239 } 240 241 // Add a section at the end of the list that will match anything not already matched. 242 JSONSection j = new JSONSection(); 243 j.section = "other"; 244 j.pattern = PatternCache.get(".*"); 245 sections.add(j); 246 247 } 248 249 /** 250 * @see XPathParts#addInternal 251 */ 252 static final Pattern ANNOTATION_CP_REMAP = PatternCache.get("^(.*)\\[@cp=\"(\\[|\\]|'|\"|@|/|=)\"\\](.*)$"); 253 /** 254 * Transform the path by applying PATH_TRANSFORMATIONS rules. 255 * 256 * @param pathStr 257 * The path string being transformed. 258 * @return The transformed path. 259 */ transformPath(final String pathStr, final String pathPrefix)260 private String transformPath(final String pathStr, final String pathPrefix) { 261 String result = pathStr; 262 263 // handle annotation cp value 264 Matcher cpm = ANNOTATION_CP_REMAP.matcher(result); 265 if( cpm.matches() ) { 266 // We need to avoid breaking the syntax not just of JSON, but of XPATH. 267 final String badCodepointRange = cpm.group(2); 268 StringBuilder sb = new StringBuilder(cpm.group(1)) 269 .append("[@cp=\""); 270 // JSON would handle a wide range of things if escaped, but XPATH will not. 271 if(badCodepointRange.codePointCount(0, badCodepointRange.length()) != 1) { 272 // forbid more than one U+ (because we will have to unescape it.) 273 throw new IllegalArgumentException("Need exactly one codepoint in the @cp string, but got " + badCodepointRange + " in xpath " + pathStr); 274 } 275 badCodepointRange.codePoints().forEach(cp -> sb.append("U+").append(Integer.toHexString(cp).toUpperCase())); 276 sb.append("\"]").append(cpm.group(3)); 277 result = sb.toString(); 278 } 279 280 if (DEBUG) { 281 System.out.println(" IN pathStr : " + result); 282 } 283 Matcher m; 284 for (int i = 0; i < LdmlConvertRules.PATH_TRANSFORMATIONS.length; i++) { 285 m = LdmlConvertRules.PATH_TRANSFORMATIONS[i].pattern.matcher(result); 286 if (m.matches()) { 287 if (DEBUG) { 288 System.out.println(LdmlConvertRules.PATH_TRANSFORMATIONS[i].pattern); 289 } 290 result = m.replaceFirst(LdmlConvertRules.PATH_TRANSFORMATIONS[i].replacement); 291 break; 292 } 293 } 294 result = result.replaceFirst("/ldml/", pathPrefix); 295 result = result.replaceFirst("/supplementalData/", pathPrefix); 296 297 if (result.contains("languages") || 298 result.contains("languageAlias") || 299 result.contains("languageMatches") || 300 result.contains("likelySubtags") || 301 result.contains("parentLocale") || 302 result.contains("locales=")) { 303 result = result.replaceAll("_", "-"); 304 } 305 if (DEBUG) { 306 System.out.println("OUT pathStr : " + result); 307 } 308 309 if (DEBUG) { 310 System.out.println("result: " + result); 311 } 312 return result; 313 } 314 mapPathsToSections(AtomicInteger readCount, int totalCount, CLDRFile file, String pathPrefix, SupplementalDataInfo sdi)315 private Map<JSONSection, List<CldrItem>> mapPathsToSections(AtomicInteger readCount, int totalCount, 316 CLDRFile file, String pathPrefix, SupplementalDataInfo sdi) 317 throws IOException, ParseException { 318 final Map<JSONSection, List<CldrItem>> sectionItems = new TreeMap<>(); 319 320 String locID = file.getLocaleID(); 321 Matcher noNumberingSystemMatcher = LdmlConvertRules.NO_NUMBERING_SYSTEM_PATTERN.matcher(""); 322 Matcher numberingSystemMatcher = LdmlConvertRules.NUMBERING_SYSTEM_PATTERN.matcher(""); 323 Matcher rootIdentityMatcher = LdmlConvertRules.ROOT_IDENTITY_PATTERN.matcher(""); 324 Set<String> activeNumberingSystems = new TreeSet<>(); 325 activeNumberingSystems.add("latn"); // Always include latin script numbers 326 for (String np : LdmlConvertRules.ACTIVE_NUMBERING_SYSTEM_XPATHS) { 327 String ns = file.getWinningValue(np); 328 if (ns != null && ns.length() > 0) { 329 activeNumberingSystems.add(ns); 330 } 331 } 332 DtdType fileDtdType; 333 if (CLDRFile.isSupplementalName(locID)) { 334 fileDtdType = DtdType.supplementalData; 335 } else { 336 fileDtdType = DtdType.ldml; 337 } 338 CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo(); 339 for (Iterator<String> it = file.iterator("", DtdData.getInstance(fileDtdType).getDtdComparator(null)); it.hasNext();) { 340 int cv = Level.UNDETERMINED.getLevel(); 341 final String path = it.next(); 342 String fullPath = file.getFullXPath(path); 343 String value = file.getWinningValue(path); 344 if (path.startsWith("//ldml/localeDisplayNames/languages") && 345 file.getSourceLocaleID(path, null).equals("code-fallback")) { 346 value = file.getConstructedBaileyValue(path, null, null); 347 } 348 349 if (fullPath == null) { 350 fullPath = path; 351 } 352 353 if (!CLDRFile.isSupplementalName(locID) && path.startsWith("//ldml/") && !path.contains("/identity")) { 354 cv = covInfo.getCoverageValue(path, locID); 355 } 356 if (cv > coverageValue) { 357 continue; 358 } 359 // Discard root identity element unless the locale is root 360 rootIdentityMatcher.reset(fullPath); 361 if (rootIdentityMatcher.matches() && !"root".equals(locID)) { 362 continue; 363 } 364 365 // automatically filter out number symbols and formats without a numbering system 366 noNumberingSystemMatcher.reset(fullPath); 367 if (noNumberingSystemMatcher.matches()) { 368 continue; 369 } 370 371 // Filter out non-active numbering systems data unless fullNumbers is specified. 372 numberingSystemMatcher.reset(fullPath); 373 if (numberingSystemMatcher.matches() && !fullNumbers) { 374 XPathParts xpp = XPathParts.getFrozenInstance(fullPath); 375 String currentNS = xpp.getAttributeValue(2, "numberSystem"); 376 if (currentNS != null && !activeNumberingSystems.contains(currentNS)) { 377 continue; 378 } 379 } 380 381 // Handle the no inheritance marker. 382 if (resolve && CldrUtility.NO_INHERITANCE_MARKER.equals(value)) { 383 continue; 384 } 385 386 String transformedPath = transformPath(path, pathPrefix); 387 String transformedFullPath = transformPath(fullPath, pathPrefix); 388 389 if(transformedPath.isEmpty()) { 390 continue; // skip this path 391 } 392 393 for (JSONSection js : sections) { 394 if (js.pattern.matcher(transformedPath).matches()) { 395 CldrItem item = new CldrItem(transformedPath, transformedFullPath, path, fullPath, value); 396 397 List<CldrItem> cldrItems = sectionItems.get(js); 398 if (cldrItems == null) { 399 cldrItems = new ArrayList<>(); 400 } 401 cldrItems.add(item); 402 sectionItems.put(js, cldrItems); 403 break; 404 } 405 } 406 } 407 408 Matcher versionInfoMatcher = PatternCache.get(".*/(identity|version).*").matcher(""); 409 // Automatically copy the version info to any sections that had real data in them. 410 JSONSection otherSection = sections.get(sections.size() - 1); 411 List<CldrItem> others = sectionItems.get(otherSection); 412 if (others == null) { 413 return sectionItems; 414 } 415 List<CldrItem> otherSectionItems = new ArrayList<>(others); 416 int addedItemCount = 0; 417 boolean copyIdentityInfo = Boolean.parseBoolean(options.get("identity").getValue()); 418 419 for (CldrItem item : otherSectionItems) { 420 String thisPath = item.getPath(); 421 versionInfoMatcher.reset(thisPath); 422 if (versionInfoMatcher.matches()) { 423 for (JSONSection js : sections) { 424 if (sectionItems.get(js) != null && !js.section.equals("other") && copyIdentityInfo) { 425 List<CldrItem> hit = sectionItems.get(js); 426 hit.add(addedItemCount, item); 427 sectionItems.put(js, hit); 428 } 429 if (js.section.equals("other")) { 430 List<CldrItem> hit = sectionItems.get(js); 431 hit.remove(item); 432 sectionItems.put(js, hit); 433 } 434 } 435 addedItemCount++; 436 } 437 } 438 return sectionItems; 439 } 440 441 /** 442 * Convert CLDR's XML data to JSON format. 443 * 444 * @param file 445 * CLDRFile object. 446 * @param outFilename 447 * The file name used to save JSON data. 448 * @throws IOException 449 * @throws ParseException 450 */ convertCldrItems(AtomicInteger readCount, int totalCount, String dirName, String filename, String pathPrefix, final Map<JSONSection, List<CldrItem>> sectionItems)451 private void convertCldrItems(AtomicInteger readCount, int totalCount, 452 String dirName, String filename, String pathPrefix, 453 final Map<JSONSection, List<CldrItem>> sectionItems) 454 throws IOException, ParseException { 455 // zone and timezone items are queued for sorting first before they are 456 // processed. 457 458 for (JSONSection js : sections) { 459 String outFilename; 460 if (type == RunType.rbnf) { 461 outFilename = filename.replaceAll("_", "-") + ".json"; 462 } else { 463 outFilename = js.section + ".json"; 464 } 465 String tier = ""; 466 boolean writeOther = Boolean.parseBoolean(options.get("other").getValue()); 467 if (js.section.equals("other") && !writeOther) { 468 continue; 469 } else { 470 StringBuilder outputDirname = new StringBuilder(outputDir); 471 if (writePackages) { 472 if (type != RunType.supplemental && type != RunType.rbnf) { 473 LocaleIDParser lp = new LocaleIDParser(); 474 lp.set(filename); 475 if (defaultContentLocales.contains(filename) && 476 lp.getRegion().length() > 0) { 477 if (type == RunType.main) { 478 skippedDefaultContentLocales.add(filename.replaceAll("_", "-")); 479 } 480 continue; 481 } 482 Level localeCoverageLevel = sc.getLocaleCoverageLevel("Cldr", filename); 483 if (localeCoverageLevel == Level.MODERN || filename.equals("root")) { 484 tier = "-modern"; 485 if (type == RunType.main) { 486 avl.modern.add(filename.replaceAll("_", "-")); 487 } 488 } else { 489 tier = "-full"; 490 } 491 if (type == RunType.main) { 492 avl.full.add(filename.replaceAll("_", "-")); 493 } 494 } else if (type == RunType.rbnf) { 495 js.packageName = "rbnf"; 496 tier = ""; 497 } 498 if (js.packageName != null) { 499 String packageName = "cldr-" + js.packageName + tier; 500 outputDirname.append("/" + packageName); 501 packages.add(packageName); 502 } 503 outputDirname.append("/" + dirName + "/"); 504 if (type != RunType.supplemental && type != RunType.rbnf) { 505 outputDirname.append(filename.replaceAll("_", "-")); 506 } 507 if (DEBUG) { 508 System.out.println("outDir: " + outputDirname); 509 System.out.println("pack: " + js.packageName); 510 System.out.println("dir: " + dirName); 511 } 512 } else { 513 outputDirname.append("/" + filename); 514 } 515 516 File dir = new File(outputDirname.toString()); 517 if (!dir.exists()) { 518 dir.mkdirs(); 519 } 520 521 List<String> outputDirs = new ArrayList<>(); 522 outputDirs.add(outputDirname.toString()); 523 if (writePackages && type == RunType.main && tier.equals("-modern")) { 524 outputDirs.add(outputDirname.toString().replaceFirst("-modern", "-full")); 525 } 526 527 for (String outputDir : outputDirs) { 528 List<CldrItem> theItems = sectionItems.get(js); 529 if (theItems == null || theItems.size() == 0) { 530 System.out.println(">"+progressPrefix(readCount, totalCount) + 531 outputDir + " - no items to write"); 532 continue; 533 } 534 System.out.println("?"+progressPrefix(readCount, totalCount) + outputDir + " - " + theItems.size() + " item(s) to write."); 535 PrintWriter outf = FileUtilities.openUTF8Writer(outputDir, outFilename); 536 JsonWriter out = new JsonWriter(outf); 537 out.setIndent(" "); 538 539 ArrayList<CldrItem> sortingItems = new ArrayList<>(); 540 ArrayList<CldrItem> arrayItems = new ArrayList<>(); 541 542 ArrayList<CldrNode> nodesForLastItem = new ArrayList<>(); 543 String lastLeadingArrayItemPath = null; 544 String leadingArrayItemPath = ""; 545 int valueCount = 0; 546 String previousIdentityPath = null; 547 for (CldrItem item : theItems) { 548 if(item.getPath().isEmpty()) { 549 throw new IllegalArgumentException("empty xpath in " + filename + " section " + js.packageName+"/"+js.section); 550 } 551 if (type == RunType.rbnf) { 552 item.setValue(item.getValue().replace('→', '>')); 553 item.setValue(item.getValue().replace('←', '<')); 554 if (item.getFullPath().contains("@value")) { 555 int indexStart = item.getFullPath().indexOf("@value") + 8; 556 int indexEnd = item.getFullPath().indexOf("]", indexStart) - 1; 557 if (indexStart >= 0 && indexEnd >= 0 && indexEnd > indexStart) { 558 String sub = item.getFullPath().substring(indexStart, indexEnd); 559 /* System.out.println("sub: " + sub); 560 System.out.println("full: " + item.getFullPath()); 561 System.out.println("val: " + item.getValue());*/ 562 item.setFullPath(item.getFullPath().replace(sub, item.getValue())); 563 item.setFullPath(item.getFullPath().replaceAll("@value", "@" + sub)); 564 //System.out.println("modifyfull: " + item.getFullPath()); 565 item.setValue(""); 566 } 567 } 568 569 } 570 // ADJUST ACCESS=PRIVATE/PUBLIC BASED ON ICU RULE -- START 571 if (type == RunType.rbnf) { 572 String fullpath = item.getFullPath(); 573 if (fullpath.contains("/ruleset")) { 574 int ruleStartIndex = fullpath.indexOf("/ruleset["); 575 String checkString = fullpath.substring(ruleStartIndex); 576 577 int ruleEndIndex = 0; 578 if (checkString.contains("/")) { 579 ruleEndIndex = fullpath.indexOf("/", ruleStartIndex + 1); 580 } 581 if (ruleEndIndex > ruleStartIndex) { 582 String oldRulePath = fullpath.substring(ruleStartIndex, ruleEndIndex); 583 584 String newRulePath = oldRulePath; 585 if (newRulePath.contains("@type")) { 586 int typeIndexStart = newRulePath.indexOf("\"", newRulePath.indexOf("@type")); 587 int typeIndexEnd = newRulePath.indexOf("\"", typeIndexStart + 1); 588 String type = newRulePath.substring(typeIndexStart + 1, typeIndexEnd); 589 590 String newType = ""; 591 if (newRulePath.contains("@access")) { 592 newType = "%%" + type; 593 } else { 594 newType = "%" + type; 595 } 596 newRulePath = newRulePath.replace(type, newType); 597 item.setPath(item.getPath().replace(type, newType)); 598 } 599 fullpath = fullpath.replace(oldRulePath, newRulePath); 600 item.setFullPath(fullpath); 601 602 } 603 } 604 } 605 // ADJUST ACCESS=PRIVATE/PUBLIC BASED ON ICU RULE -- END 606 607 // items in the identity section of a file should only ever contain the lowest level, even if using 608 // resolving source, so if we have duplicates ( caused by attributes used as a value ) then suppress 609 // them here. 610 if (item.getPath().contains("/identity/")) { 611 String[] parts = item.getPath().split("\\["); 612 if (parts[0].equals(previousIdentityPath)) { 613 continue; 614 } else { 615 XPathParts xpp = XPathParts.getFrozenInstance(item.getPath()); 616 String territory = xpp.findAttributeValue("territory", "type"); 617 LocaleIDParser lp = new LocaleIDParser().set(filename); 618 if (territory != null && territory.length() > 0 && !territory.equals(lp.getRegion())) { 619 continue; 620 } 621 previousIdentityPath = parts[0]; 622 } 623 } 624 625 // some items need to be split to multiple item before processing. None 626 // of those items need to be sorted. 627 CldrItem[] items = item.split(); 628 if (items == null) { 629 items = new CldrItem[1]; 630 items[0] = item; 631 } 632 valueCount += items.length; 633 634 for (CldrItem newItem : items) { 635 // alias will be dropped in conversion, don't count it. 636 if (newItem.isAliasItem()) { 637 valueCount--; 638 } 639 640 // Items like zone items need to be sorted first before write them out. 641 if (newItem.needsSort()) { 642 resolveArrayItems(out, nodesForLastItem, arrayItems); 643 sortingItems.add(newItem); 644 } else { 645 Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher( 646 newItem.getPath()); 647 if (matcher.matches()) { 648 resolveSortingItems(out, nodesForLastItem, sortingItems); 649 leadingArrayItemPath = matcher.group(1); 650 if (lastLeadingArrayItemPath != null && 651 !lastLeadingArrayItemPath.equals(leadingArrayItemPath)) { 652 resolveArrayItems(out, nodesForLastItem, arrayItems); 653 } 654 lastLeadingArrayItemPath = leadingArrayItemPath; 655 arrayItems.add(newItem); 656 } else { 657 resolveSortingItems(out, nodesForLastItem, sortingItems); 658 resolveArrayItems(out, nodesForLastItem, arrayItems); 659 outputCldrItem(out, nodesForLastItem, newItem); 660 lastLeadingArrayItemPath = ""; 661 } 662 } 663 } 664 } 665 666 resolveSortingItems(out, nodesForLastItem, sortingItems); 667 resolveArrayItems(out, nodesForLastItem, arrayItems); 668 System.out.println(">"+progressPrefix(readCount, totalCount) + String.format(".../%s/%s\t= %d values", 669 dir.getPath().substring(this.outputDir.length()+1), outFilename, valueCount)); 670 closeNodes(out, nodesForLastItem.size() - 2, 0); 671 outf.println(); 672 out.close(); 673 } 674 } 675 } 676 } 677 678 /** 679 * Creates the packaging files ( i.e. package.json ) for a particular package 680 * 681 * @param packageName 682 * The name of the installable package 683 */ writePackagingFiles(String outputDir, String packageName)684 public void writePackagingFiles(String outputDir, String packageName) throws IOException { 685 writePackageJson(outputDir, packageName); 686 writeBowerJson(outputDir, packageName); 687 } 688 writeBasicInfo(JsonObject obj, String packageName, boolean isNPM)689 public void writeBasicInfo(JsonObject obj, String packageName, boolean isNPM) { 690 691 obj.addProperty("name", packageName); 692 String versionString = CLDRFile.GEN_VERSION; 693 while (versionString.split("\\.").length < 3) { 694 versionString = versionString + ".0"; 695 } 696 obj.addProperty("version", versionString); 697 698 String[] packageNameParts = packageName.split("-"); 699 String dependency = dependencies.get(packageNameParts[1]); 700 if (dependency != null) { 701 String[] dependentPackageNames = new String[1]; 702 String tier = packageNameParts[packageNameParts.length - 1]; 703 if (dependency.equals("core")) { 704 dependentPackageNames[0] = "cldr-core"; 705 } else { 706 dependentPackageNames[0] = "cldr-" + dependency + "-" + tier; 707 } 708 709 JsonObject dependencies = new JsonObject(); 710 for (String dependentPackageName : dependentPackageNames) { 711 if (dependentPackageName != null) { 712 dependencies.addProperty(dependentPackageName, versionString); 713 } 714 } 715 obj.add(isNPM ? "peerDependencies" : "dependencies", dependencies); 716 } 717 } 718 writePackageJson(String outputDir, String packageName)719 public void writePackageJson(String outputDir, String packageName) throws IOException { 720 PrintWriter outf = FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "package.json"); 721 System.out.println("Creating packaging file => " + outputDir + File.separator + packageName + File.separator + "package.json"); 722 JsonObject obj = new JsonObject(); 723 writeBasicInfo(obj, packageName, true); 724 725 JsonArray maintainers = new JsonArray(); 726 JsonObject primaryMaintainer = new JsonObject(); 727 728 obj.addProperty("homepage", "http://cldr.unicode.org"); 729 obj.addProperty("author", "The Unicode Consortium"); 730 731 primaryMaintainer.addProperty("name", "John Emmons"); 732 primaryMaintainer.addProperty("email", "emmo@us.ibm.com"); 733 primaryMaintainer.addProperty("url", "https://github.com/JCEmmons"); 734 maintainers.add(primaryMaintainer); 735 obj.add("maintainers", maintainers); 736 737 JsonObject repository = new JsonObject(); 738 repository.addProperty("type", "git"); 739 repository.addProperty("url", "git://github.com/unicode-cldr/" + packageName + ".git"); 740 obj.add("repository", repository); 741 742 obj.addProperty("license", "Unicode-DFS-2016"); 743 744 obj.addProperty("bugs", "https://unicode-org.atlassian.net/projects/CLDR/issues"); 745 746 outf.println(gson.toJson(obj)); 747 outf.close(); 748 } 749 writeBowerJson(String outputDir, String packageName)750 public void writeBowerJson(String outputDir, String packageName) throws IOException { 751 PrintWriter outf = FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "bower.json"); 752 System.out.println("Creating packaging file => " + outputDir + File.separator + packageName + File.separator + "bower.json"); 753 JsonObject obj = new JsonObject(); 754 writeBasicInfo(obj, packageName, false); 755 if (type == RunType.supplemental) { 756 JsonArray mainPaths = new JsonArray(); 757 mainPaths.add(new JsonPrimitive("availableLocales.json")); 758 mainPaths.add(new JsonPrimitive("defaultContent.json")); 759 mainPaths.add(new JsonPrimitive("scriptMetadata.json")); 760 mainPaths.add(new JsonPrimitive(type.toString() + "/*.json")); 761 obj.add("main", mainPaths); 762 } else if (type == RunType.rbnf) { 763 obj.addProperty("main", type.toString() + "/*.json"); 764 } else { 765 obj.addProperty("main", type.toString() + "/**/*.json"); 766 } 767 768 JsonArray ignorePaths = new JsonArray(); 769 ignorePaths.add(new JsonPrimitive(".gitattributes")); 770 ignorePaths.add(new JsonPrimitive("README.md")); 771 obj.add("ignore", ignorePaths); 772 773 outf.println(gson.toJson(obj)); 774 outf.close(); 775 } 776 writeDefaultContent(String outputDir)777 public void writeDefaultContent(String outputDir) throws IOException { 778 PrintWriter outf = FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "defaultContent.json"); 779 System.out.println("Creating packaging file => " + outputDir + "cldr-core" + File.separator + "defaultContent.json"); 780 JsonObject obj = new JsonObject(); 781 obj.add("defaultContent", gson.toJsonTree(skippedDefaultContentLocales)); 782 outf.println(gson.toJson(obj)); 783 outf.close(); 784 } 785 writeAvailableLocales(String outputDir)786 public void writeAvailableLocales(String outputDir) throws IOException { 787 PrintWriter outf = FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "availableLocales.json"); 788 System.out.println("Creating packaging file => " + outputDir + "cldr-core" + File.separator + "availableLocales.json"); 789 JsonObject obj = new JsonObject(); 790 obj.add("availableLocales", gson.toJsonTree(avl)); 791 outf.println(gson.toJson(obj)); 792 outf.close(); 793 } 794 writeScriptMetadata(String outputDir)795 public void writeScriptMetadata(String outputDir) throws IOException { 796 PrintWriter outf = FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "scriptMetadata.json"); 797 System.out.println("Creating script metadata file => " + outputDir + File.separator + "cldr-core" + File.separator + "scriptMetadata.json"); 798 Map<String, Info> scriptInfo = new TreeMap<>(); 799 for (String script : ScriptMetadata.getScripts()) { 800 Info i = ScriptMetadata.getInfo(script); 801 scriptInfo.put(script, i); 802 } 803 if (ScriptMetadata.errors.size() > 0) { 804 System.err.println(Joiner.on("\n\t").join(ScriptMetadata.errors)); 805 //throw new IllegalArgumentException(); 806 } 807 808 JsonObject obj = new JsonObject(); 809 obj.add("scriptMetadata", gson.toJsonTree(scriptInfo)); 810 outf.println(gson.toJson(obj)); 811 outf.close(); 812 } 813 814 /** 815 * Process the pending sorting items. 816 * 817 * @param out 818 * The ArrayList to hold all output lines. 819 * @param nodesForLastItem 820 * All the nodes from last item. 821 * @param sortingItems 822 * The item list that should be sorted before output. 823 * @throws IOException 824 * @throws ParseException 825 */ resolveSortingItems(JsonWriter out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> sortingItems)826 private void resolveSortingItems(JsonWriter out, 827 ArrayList<CldrNode> nodesForLastItem, 828 ArrayList<CldrItem> sortingItems) 829 throws IOException, ParseException { 830 ArrayList<CldrItem> arrayItems = new ArrayList<>(); 831 String lastLeadingArrayItemPath = null; 832 833 if (!sortingItems.isEmpty()) { 834 Collections.sort(sortingItems); 835 for (CldrItem item : sortingItems) { 836 Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher( 837 item.getPath()); 838 if (matcher.matches()) { 839 String leadingArrayItemPath = matcher.group(1); 840 if (lastLeadingArrayItemPath != null && 841 !lastLeadingArrayItemPath.equals(leadingArrayItemPath)) { 842 resolveArrayItems(out, nodesForLastItem, arrayItems); 843 } 844 lastLeadingArrayItemPath = leadingArrayItemPath; 845 arrayItems.add(item); 846 } else { 847 outputCldrItem(out, nodesForLastItem, item); 848 } 849 } 850 sortingItems.clear(); 851 resolveArrayItems(out, nodesForLastItem, arrayItems); 852 } 853 } 854 855 /** 856 * Process the pending array items. 857 * 858 * @param out 859 * The ArrayList to hold all output lines. 860 * @param nodesForLastItem 861 * All the nodes from last item. 862 * @param arrayItems 863 * The item list that should be output as array. 864 * @throws IOException 865 * @throws ParseException 866 */ resolveArrayItems(JsonWriter out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> arrayItems)867 private void resolveArrayItems(JsonWriter out, 868 ArrayList<CldrNode> nodesForLastItem, 869 ArrayList<CldrItem> arrayItems) 870 throws IOException, ParseException { 871 boolean rbnfFlag = false; 872 if (!arrayItems.isEmpty()) { 873 CldrItem firstItem = arrayItems.get(0); 874 if (firstItem.needsSort()) { 875 Collections.sort(arrayItems); 876 firstItem = arrayItems.get(0); 877 } 878 879 int arrayLevel = getArrayIndentLevel(firstItem); 880 881 outputStartArray(out, nodesForLastItem, firstItem, arrayLevel); 882 883 // Previous statement closed for first element, trim nodesForLastItem 884 // so that it will not happen again inside. 885 int len = nodesForLastItem.size(); 886 while (len > arrayLevel) { 887 nodesForLastItem.remove(len - 1); 888 len--; 889 } 890 if (arrayItems.get(0).getFullPath().contains("rbnfrule")) { 891 rbnfFlag = true; 892 out.beginObject(); 893 } 894 for (CldrItem insideItem : arrayItems) { 895 896 outputArrayItem(out, insideItem, nodesForLastItem, arrayLevel); 897 898 } 899 if (rbnfFlag) { 900 out.endObject(); 901 } 902 903 arrayItems.clear(); 904 905 int lastLevel = nodesForLastItem.size() - 1; 906 closeNodes(out, lastLevel, arrayLevel); 907 if (!rbnfFlag) { 908 out.endArray(); 909 } 910 for (int i = arrayLevel - 1; i < lastLevel; i++) { 911 nodesForLastItem.remove(i); 912 } 913 } 914 } 915 916 /** 917 * Find the indent level on which array should be inserted. 918 * 919 * @param item 920 * The CldrItem being examined. 921 * @return The array indent level. 922 * @throws ParseException 923 */ getArrayIndentLevel(CldrItem item)924 private int getArrayIndentLevel(CldrItem item) throws ParseException { 925 Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher( 926 item.getPath()); 927 if (!matcher.matches()) { 928 System.out.println("No match found for " + item.getPath() + ", this shouldn't happen."); 929 return 0; 930 } 931 932 String leadingPath = matcher.group(1); 933 CldrItem fakeItem = new CldrItem(leadingPath, leadingPath, leadingPath, leadingPath, ""); 934 return fakeItem.getNodesInPath().size() - 1; 935 } 936 937 /** 938 * Write the start of an array. 939 * 940 * @param out 941 * The ArrayList to hold all output lines. 942 * @param nodesForLastItem 943 * Nodes in path for last CldrItem. 944 * @param item 945 * The CldrItem to be processed. 946 * @param arrayLevel 947 * The level on which array is laid out. 948 * @throws IOException 949 * @throws ParseException 950 */ outputStartArray(JsonWriter out, ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel)951 private void outputStartArray(JsonWriter out, 952 ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel) 953 throws IOException, ParseException { 954 955 ArrayList<CldrNode> nodesInPath = item.getNodesInPath(); 956 957 int i = findFirstDiffNodeIndex(nodesForLastItem, nodesInPath); 958 959 // close previous nodes 960 closeNodes(out, nodesForLastItem.size() - 2, i); 961 962 for (; i < arrayLevel - 1; i++) { 963 startNonleafNode(out, nodesInPath.get(i), i); 964 } 965 966 String objName = nodesInPath.get(i).getNodeKeyName(); 967 out.name(objName); 968 if (!item.getFullPath().contains("rbnfrule")) { 969 out.beginArray(); 970 } 971 } 972 973 /** 974 * Write a CLDR item to file. 975 * 976 * "usesMetazone" will be checked to see if it is current. Those non-current 977 * item will be dropped. 978 * 979 * @param out 980 * The ArrayList to hold all output lines. 981 * @param nodesForLastItem 982 * @param item 983 * The CldrItem to be processed. 984 * @throws IOException 985 * @throws ParseException 986 */ outputCldrItem(JsonWriter out, ArrayList<CldrNode> nodesForLastItem, CldrItem item)987 private void outputCldrItem(JsonWriter out, 988 ArrayList<CldrNode> nodesForLastItem, CldrItem item) 989 throws IOException, ParseException { 990 // alias has been resolved, no need to keep it. 991 if (item.isAliasItem()) { 992 return; 993 } 994 995 ArrayList<CldrNode> nodesInPath = item.getNodesInPath(); 996 int arraySize = nodesInPath.size(); 997 998 int i = findFirstDiffNodeIndex(nodesForLastItem, nodesInPath); 999 if (i == nodesInPath.size() && type != RunType.rbnf) { 1000 System.err.println("This nodes and last nodes has identical path. (" 1001 + item.getPath() + ") Some distinguishing attributes wrongly removed?"); 1002 return; 1003 } 1004 1005 // close previous nodes 1006 closeNodes(out, nodesForLastItem.size() - 2, i); 1007 1008 for (; i < nodesInPath.size() - 1; ++i) { 1009 startNonleafNode(out, nodesInPath.get(i), i); 1010 } 1011 1012 writeLeafNode(out, nodesInPath.get(i), item.getValue(), i); 1013 nodesForLastItem.clear(); 1014 nodesForLastItem.addAll(nodesInPath); 1015 } 1016 1017 /** 1018 * Close nodes that no longer appears in path. 1019 * 1020 * @param out 1021 * The JsonWriter to hold all output lines. 1022 * @param last 1023 * The last node index in previous item. 1024 * @param firstDiff 1025 * The first different node in next item. 1026 * @throws IOException 1027 */ closeNodes(JsonWriter out, int last, int firstDiff)1028 private void closeNodes(JsonWriter out, int last, int firstDiff) 1029 throws IOException { 1030 for (int i = last; i >= firstDiff; --i) { 1031 if (i == 0) { 1032 out.endObject(); 1033 break; 1034 } 1035 out.endObject(); 1036 } 1037 } 1038 1039 /** 1040 * Start a non-leaf node, write out its attributes. 1041 * 1042 * @param out 1043 * The ArrayList to hold all output lines. 1044 * @param node 1045 * The node being written. 1046 * @param level 1047 * indentation level. 1048 * @throws IOException 1049 */ startNonleafNode(JsonWriter out, CldrNode node, int level)1050 private void startNonleafNode(JsonWriter out, CldrNode node, int level) 1051 throws IOException { 1052 String objName = node.getNodeKeyName(); 1053 // Some node should be skipped as indicated by objName being null. 1054 if (objName == null) { 1055 return; 1056 } 1057 1058 // first level needs no key, it is the container. 1059 if (level == 0) { 1060 out.beginObject(); 1061 return; 1062 } 1063 1064 Map<String, String> attrAsValueMap = node.getAttrAsValueMap(); 1065 1066 if( type == RunType.annotations || type == RunType.annotationsDerived ) { 1067 if(objName.startsWith("U+")) { 1068 // parse U+22 -> " etc 1069 out.name(com.ibm.icu.text.UTF16.valueOf(Integer.parseInt(objName.substring(2), 16))); 1070 } else { 1071 out.name(objName); 1072 } 1073 } else { 1074 out.name(objName); 1075 } 1076 1077 out.beginObject(); 1078 for (String key : attrAsValueMap.keySet()) { 1079 String value = escapeValue(attrAsValueMap.get(key)); 1080 // attribute is prefixed with "_" when being used as key. 1081 out.name("_" + key).value(value); 1082 } 1083 } 1084 1085 /** 1086 * Write a CLDR item to file. 1087 * 1088 * "usesMetazone" will be checked to see if it is current. Those non-current 1089 * item will be dropped. 1090 * 1091 * @param out 1092 * The ArrayList to hold all output lines. 1093 * @param item 1094 * The CldrItem to be processed. 1095 * @param nodesForLastItem 1096 * Nodes in path for last item. 1097 * @param arrayLevel 1098 * The indentation level in which array exists. 1099 * @throws IOException 1100 * @throws ParseException 1101 */ outputArrayItem(JsonWriter out, CldrItem item, ArrayList<CldrNode> nodesForLastItem, int arrayLevel)1102 private void outputArrayItem(JsonWriter out, CldrItem item, 1103 ArrayList<CldrNode> nodesForLastItem, int arrayLevel) 1104 throws IOException, ParseException { 1105 1106 // This method is more complicated that outputCldrItem because it needs to 1107 // handle 3 different cases. 1108 // 1. When difference is found below array item, this item will be of the 1109 // same array item. Inside the array item, it is about the same as 1110 // outputCldrItem, just with one more level of indentation because of 1111 // the array. 1112 // 2. The array item is the leaf item with no attribute, simplify it as 1113 // an object with one name/value pair. 1114 // 3. The array item is the leaf item with attribute, an embedded object 1115 // will be created inside the array item object. 1116 1117 ArrayList<CldrNode> nodesInPath = item.getNodesInPath(); 1118 String value = escapeValue(item.getValue()); 1119 int nodesNum = nodesInPath.size(); 1120 1121 // case 1 1122 int diff = findFirstDiffNodeIndex(nodesForLastItem, nodesInPath); 1123 if (diff > arrayLevel) { 1124 // close previous nodes 1125 closeNodes(out, nodesForLastItem.size() - 1, diff + 1); 1126 1127 for (int i = diff; i < nodesNum - 1; i++) { 1128 startNonleafNode(out, nodesInPath.get(i), i + 1); 1129 } 1130 writeLeafNode(out, nodesInPath.get(nodesNum - 1), value, nodesNum); 1131 return; 1132 } 1133 1134 if (arrayLevel == nodesNum - 1) { 1135 // case 2 1136 // close previous nodes 1137 if (nodesForLastItem.size() - 1 - arrayLevel > 0) { 1138 closeNodes(out, nodesForLastItem.size() - 1, arrayLevel); 1139 } 1140 1141 String objName = nodesInPath.get(nodesNum - 1).getNodeKeyName(); 1142 int pos = objName.indexOf('-'); 1143 if (pos > 0) { 1144 objName = objName.substring(0, pos); 1145 } 1146 1147 Map<String, String> attrAsValueMap = nodesInPath.get(nodesNum - 1).getAttrAsValueMap(); 1148 1149 // ADJUST RADIX BASED ON ICU RULE -- BEGIN 1150 if (attrAsValueMap.containsKey("radix")) { 1151 String radixValue = attrAsValueMap.get("radix"); 1152 attrAsValueMap.remove("radix"); 1153 for (Map.Entry<String, String> attributes : attrAsValueMap.entrySet()) { 1154 String oldKey = attributes.getKey(); 1155 String newValue = attributes.getValue(); 1156 String newKey = oldKey + "/" + radixValue; 1157 attrAsValueMap.remove(oldKey); 1158 attrAsValueMap.put(newKey, newValue); 1159 1160 } 1161 } 1162 // ADJUST RADIX BASED ON ICU RULE -- END 1163 1164 if (attrAsValueMap.isEmpty()) { 1165 out.beginObject(); 1166 out.name(objName).value(value); 1167 out.endObject(); 1168 } else { 1169 if (!objName.equals("rbnfrule")) { 1170 out.beginObject(); 1171 } 1172 writeLeafNode(out, objName, attrAsValueMap, value, nodesNum); 1173 if (!objName.equals("rbnfrule")) { 1174 out.endObject(); 1175 } 1176 1177 } 1178 // the last node is closed, remove it. 1179 nodesInPath.remove(nodesNum - 1); 1180 } else { 1181 // case 3 1182 // close previous nodes 1183 if (nodesForLastItem.size() - 1 - (arrayLevel) > 0) { 1184 closeNodes(out, nodesForLastItem.size() - 1, arrayLevel); 1185 } 1186 1187 out.beginObject(); 1188 1189 CldrNode node = nodesInPath.get(arrayLevel); 1190 String objName = node.getNodeKeyName(); 1191 int pos = objName.indexOf('-'); 1192 if (pos > 0) { 1193 objName = objName.substring(0, pos); 1194 } 1195 Map<String, String> attrAsValueMap = node.getAttrAsValueMap(); 1196 out.name(objName); 1197 out.beginObject(); 1198 for (String key : attrAsValueMap.keySet()) { 1199 // attribute is prefixed with "_" when being used as key. 1200 out.name("_" + key).value(escapeValue(attrAsValueMap.get(key))); 1201 } 1202 1203 for (int i = arrayLevel + 1; i < nodesInPath.size() - 1; i++) { 1204 startNonleafNode(out, nodesInPath.get(i), i + 1); 1205 } 1206 writeLeafNode(out, nodesInPath.get(nodesNum - 1), value, nodesNum); 1207 } 1208 1209 nodesForLastItem.clear(); 1210 nodesForLastItem.addAll(nodesInPath); 1211 } 1212 1213 /** 1214 * Compare two nodes list, find first index that the two list have different 1215 * nodes and return it. 1216 * 1217 * @param nodesForLastItem 1218 * Nodes from last item. 1219 * @param nodesInPath 1220 * Nodes for current item. 1221 * @return The index of first different node. 1222 */ findFirstDiffNodeIndex(ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrNode> nodesInPath)1223 private int findFirstDiffNodeIndex(ArrayList<CldrNode> nodesForLastItem, 1224 ArrayList<CldrNode> nodesInPath) { 1225 int i; 1226 for (i = 0; i < nodesInPath.size(); ++i) { 1227 if (i >= nodesForLastItem.size() || 1228 !nodesInPath.get(i).getNodeDistinguishingName().equals( 1229 nodesForLastItem.get(i).getNodeDistinguishingName())) { 1230 break; 1231 } 1232 } 1233 return i; 1234 } 1235 progressPrefix(AtomicInteger readCount, int totalCount)1236 private final String progressPrefix(AtomicInteger readCount, int totalCount) { 1237 return progressPrefix(readCount.get(), totalCount); 1238 } 1239 progressPrefix(int readCount, int totalCount)1240 private final String progressPrefix(int readCount, int totalCount) { 1241 return String.format("[%d/%d]:\t", readCount, totalCount); 1242 } 1243 1244 /** 1245 * Process files in a directory of CLDR file tree. 1246 * 1247 * @param dirName 1248 * The directory in which xml file will be transformed. 1249 * @param minimalDraftStatus 1250 * The minimumDraftStatus that will be accepted. 1251 * @throws IOException 1252 * @throws ParseException 1253 */ processDirectory(String dirName, DraftStatus minimalDraftStatus)1254 public void processDirectory(String dirName, DraftStatus minimalDraftStatus) 1255 throws IOException, ParseException { 1256 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(cldrCommonDir + "supplemental"); 1257 Factory cldrFactory = Factory.make( 1258 cldrCommonDir + dirName + "/", ".*"); 1259 Set<String> files = cldrFactory.getAvailable(); 1260 final int total = files.size(); 1261 AtomicInteger readCount = new AtomicInteger(0); 1262 Map<String, Throwable> errs = new TreeMap<>(); 1263 files 1264 .parallelStream() 1265 .unordered() 1266 .forEach(filename -> { 1267 1268 if (LdmlConvertRules.IGNORE_FILE_SET.contains(filename)) { 1269 return; 1270 } 1271 if (!filename.matches(match)) { 1272 return; 1273 } 1274 1275 String pathPrefix; 1276 CLDRFile file = cldrFactory.make(filename, resolve && type == RunType.main, minimalDraftStatus); 1277 // Print 'reading' after the make, to stagger the output a little bit. 1278 // Otherwise, the printout happens before any work happens, and is easily out of order. 1279 System.out.println("<"+progressPrefix(readCount.incrementAndGet(), total) + 1280 "Reading " + dirName + "/" + filename); 1281 1282 if (type == RunType.main) { 1283 pathPrefix = "/cldr/" + dirName + "/" + filename.replaceAll("_", "-") + "/"; 1284 } else { 1285 pathPrefix = "/cldr/" + dirName + "/"; 1286 } 1287 1288 try { 1289 convertCldrItems(readCount, total, dirName, filename, pathPrefix, 1290 mapPathsToSections(readCount, total, file, pathPrefix, sdi)); 1291 } catch(IOException | ParseException t) { 1292 t.printStackTrace(); 1293 System.err.println("!"+progressPrefix(readCount.incrementAndGet(), total)+filename + " - err - " + t); 1294 errs.put(filename, t); 1295 } finally { 1296 System.out.println("."+progressPrefix(readCount, total) + 1297 "Completing " + dirName + "/" + filename); 1298 } 1299 }); 1300 1301 if(!errs.isEmpty()) { 1302 System.err.println("Errors in these files:"); 1303 for(Map.Entry<String,Throwable> e : errs.entrySet()) { 1304 System.err.println(e.getKey() + " - " + e.getValue()); 1305 } 1306 // rethrow 1307 for(Map.Entry<String,Throwable> e : errs.entrySet()) { 1308 if(e.getValue() instanceof IOException ) { 1309 throw (IOException)e.getValue(); // throw the first one 1310 } else if(e.getValue() instanceof ParseException ) { 1311 throw (ParseException)e.getValue(); // throw the first one 1312 } else { 1313 throw new RuntimeException("Other exception thrown: " + e.getValue()); 1314 } 1315 /* NOTREACHED */ 1316 } 1317 } 1318 1319 if (writePackages) { 1320 for (String currentPackage : packages) { 1321 writePackagingFiles(outputDir, currentPackage); 1322 } 1323 if (type == RunType.main) { 1324 writeDefaultContent(outputDir); 1325 writeAvailableLocales(outputDir); 1326 } else if (type == RunType.supplemental) { 1327 writeScriptMetadata(outputDir); 1328 } 1329 1330 } 1331 } 1332 1333 /** 1334 * Replacement pattern for escaping. 1335 */ 1336 private static final Pattern escapePattern = PatternCache.get("\\\\(?!u)"); 1337 1338 /** 1339 * Escape \ and " in value string. 1340 * \ should be replaced by \\, except in case of \u1234 1341 * " should be replaced by \" 1342 * In following code, \\\\ represent one \, because java compiler and 1343 * regular expression compiler each do one round of escape. 1344 * 1345 * @param value 1346 * Input string. 1347 * @return escaped string. 1348 */ escapeValue(String value)1349 private String escapeValue(String value) { 1350 Matcher match = escapePattern.matcher(value); 1351 String ret = match.replaceAll("\\\\\\\\"); 1352 return ret.replace("\"", "\\\"").replace("\n", " ").replace("\t", " "); 1353 } 1354 1355 /** 1356 * Write the value to output. 1357 * 1358 * @param out 1359 * The ArrayList to hold all output lines. 1360 * @param node 1361 * The CldrNode being written. 1362 * @param value 1363 * The value part for this element. 1364 * @param level 1365 * Indent level. 1366 * @throws IOException 1367 */ writeLeafNode(JsonWriter out, CldrNode node, String value, int level)1368 private void writeLeafNode(JsonWriter out, CldrNode node, String value, 1369 int level) throws IOException { 1370 1371 String objName = node.getNodeKeyName(); 1372 Map<String, String> attrAsValueMaps = node.getAttrAsValueMap(); 1373 writeLeafNode(out, objName, attrAsValueMaps, value, level); 1374 } 1375 1376 /** 1377 * Write the value to output. 1378 * 1379 * @param out 1380 * The ArrayList to hold all output lines. 1381 * @param objName 1382 * The node's node. 1383 * @param attrAsValueMap 1384 * Those attributes that will be treated as values. 1385 * @param value 1386 * The value part for this element. 1387 * @param level 1388 * Indent level. 1389 * @throws IOException 1390 */ writeLeafNode(JsonWriter out, String objName, Map<String, String> attrAsValueMap, String value, int level)1391 private void writeLeafNode(JsonWriter out, String objName, 1392 Map<String, String> attrAsValueMap, String value, int level) 1393 throws IOException { 1394 if (objName == null) { 1395 return; 1396 } 1397 value = escapeValue(value); 1398 1399 if (attrAsValueMap.isEmpty()) { 1400 out.name(objName); 1401 if (value.isEmpty()) { 1402 out.beginObject(); 1403 out.endObject(); 1404 } else if (type == RunType.annotations || 1405 type == RunType.annotationsDerived) { 1406 out.beginArray(); 1407 // split this, so "a | b | c" becomes ["a","b","c"] 1408 for (final String s : Annotations.splitter.split(value.trim())) { 1409 out.value(s); 1410 } 1411 out.endArray(); 1412 } else { 1413 // normal value 1414 out.value(value); 1415 } 1416 return; 1417 } 1418 1419 // If there is no value, but a attribute being treated as value, 1420 // simplify the output. 1421 if (value.isEmpty() && 1422 attrAsValueMap.containsKey(LdmlConvertRules.ANONYMOUS_KEY)) { 1423 out.name(objName).value(attrAsValueMap.get(LdmlConvertRules.ANONYMOUS_KEY)); 1424 return; 1425 } 1426 if (!objName.equals("rbnfrule")) { 1427 out.name(objName); 1428 out.beginObject(); 1429 } 1430 1431 if (!value.isEmpty()) { 1432 out.name("_value").value(value); 1433 } 1434 1435 for (String key : attrAsValueMap.keySet()) { 1436 String attrValue = escapeValue(attrAsValueMap.get(key)); 1437 // attribute is prefixed with "_" when being used as key. 1438 if (LdmlConvertRules.ATTRVALUE_AS_ARRAY_SET.contains(key)) { 1439 String[] strings = attrValue.trim().split("\\s+"); 1440 if (type != RunType.rbnf) { 1441 out.name("_" + key); 1442 } else { 1443 out.name(key); 1444 } 1445 out.beginArray(); 1446 for (String s : strings) { 1447 out.value(s); 1448 } 1449 out.endArray(); 1450 } else if (type != RunType.rbnf) { 1451 out.name("_" + key).value(attrValue); 1452 } else { 1453 out.name(key).value(attrValue); 1454 } 1455 } 1456 if (!objName.equals("rbnfrule")) { 1457 out.endObject(); 1458 } 1459 } 1460 } 1461