1 package org.unicode.cldr.json; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.Lists; 5 import com.google.gson.Gson; 6 import com.google.gson.GsonBuilder; 7 import com.google.gson.JsonArray; 8 import com.google.gson.JsonElement; 9 import com.google.gson.JsonObject; 10 import com.google.gson.JsonPrimitive; 11 import com.ibm.icu.number.IntegerWidth; 12 import com.ibm.icu.number.LocalizedNumberFormatter; 13 import com.ibm.icu.number.NumberFormatter; 14 import com.ibm.icu.number.Precision; 15 import com.ibm.icu.text.MessageFormat; 16 import com.ibm.icu.util.NoUnit; 17 import com.ibm.icu.util.ULocale; 18 import java.io.BufferedReader; 19 import java.io.File; 20 import java.io.IOException; 21 import java.io.PrintWriter; 22 import java.text.ParseException; 23 import java.util.ArrayList; 24 import java.util.Arrays; 25 import java.util.Collections; 26 import java.util.HashSet; 27 import java.util.Iterator; 28 import java.util.LinkedList; 29 import java.util.List; 30 import java.util.Locale; 31 import java.util.Map; 32 import java.util.Map.Entry; 33 import java.util.Optional; 34 import java.util.Set; 35 import java.util.TreeMap; 36 import java.util.TreeSet; 37 import java.util.concurrent.atomic.AtomicInteger; 38 import java.util.logging.Logger; 39 import java.util.regex.Matcher; 40 import java.util.regex.Pattern; 41 import java.util.stream.Collectors; 42 import org.unicode.cldr.draft.FileUtilities; 43 import org.unicode.cldr.draft.ScriptMetadata; 44 import org.unicode.cldr.draft.ScriptMetadata.Info; 45 import org.unicode.cldr.tool.Option.Options; 46 import org.unicode.cldr.util.Annotations; 47 import org.unicode.cldr.util.CLDRConfig; 48 import org.unicode.cldr.util.CLDRFile; 49 import org.unicode.cldr.util.CLDRFile.DraftStatus; 50 import org.unicode.cldr.util.CLDRLocale; 51 import org.unicode.cldr.util.CLDRPaths; 52 import org.unicode.cldr.util.CLDRTool; 53 import org.unicode.cldr.util.CLDRTransforms; 54 import org.unicode.cldr.util.CLDRURLS; 55 import org.unicode.cldr.util.CalculatedCoverageLevels; 56 import org.unicode.cldr.util.CldrUtility; 57 import org.unicode.cldr.util.CoverageInfo; 58 import org.unicode.cldr.util.DtdData; 59 import org.unicode.cldr.util.DtdType; 60 import org.unicode.cldr.util.Factory; 61 import org.unicode.cldr.util.FileCopier; 62 import org.unicode.cldr.util.GlossonymConstructor; 63 import org.unicode.cldr.util.Level; 64 import org.unicode.cldr.util.LocaleIDParser; 65 import org.unicode.cldr.util.Pair; 66 import org.unicode.cldr.util.PatternCache; 67 import org.unicode.cldr.util.StandardCodes; 68 import org.unicode.cldr.util.SupplementalDataInfo; 69 import org.unicode.cldr.util.Timer; 70 import org.unicode.cldr.util.XMLSource; 71 import org.unicode.cldr.util.XPathParts; 72 73 /** 74 * Utility methods to extract data from CLDR repository and export it in JSON format. 75 * 76 * @author shanjian / emmons 77 */ 78 @CLDRTool(alias = "ldml2json", description = "Convert CLDR data to JSON") 79 public class Ldml2JsonConverter { 80 // Icons 81 private static final String DONE_ICON = "✅"; 82 private static final String GEAR_ICON = "⚙️"; 83 private static final String NONE_ICON = "∅"; 84 private static final String PACKAGE_ICON = ""; 85 private static final String SECTION_ICON = ""; 86 private static final String TYPE_ICON = ""; 87 private static final String WARN_ICON = "⚠️"; 88 89 // File prefix 90 private static final String CLDR_PKG_PREFIX = "cldr-"; 91 private static final String FULL_TIER_SUFFIX = "-full"; 92 private static final String MODERN_TIER_SUFFIX = "-modern"; 93 private static final String TRANSFORM_RAW_SUFFIX = ".txt"; 94 private static Logger logger = Logger.getLogger(Ldml2JsonConverter.class.getName()); 95 96 enum RunType { 97 all, // number zero 98 main, 99 supplemental(false, false), // aka 'cldr-core' 100 segments, 101 rbnf(false, true), 102 annotations, 103 annotationsDerived, 104 bcp47(false, false), 105 transforms(false, false); 106 107 private final boolean isTiered; 108 private final boolean hasLocales; 109 RunType()110 RunType() { 111 this.isTiered = true; 112 this.hasLocales = true; 113 } 114 RunType(boolean isTiered, boolean hasLocales)115 RunType(boolean isTiered, boolean hasLocales) { 116 this.isTiered = isTiered; 117 this.hasLocales = hasLocales; 118 } 119 /** 120 * Is it split into modern/full? 121 * 122 * @return 123 */ tiered()124 public boolean tiered() { 125 return isTiered; 126 } 127 /** 128 * Does it have locale IDs? 129 * 130 * @return 131 */ locales()132 public boolean locales() { 133 return hasLocales; 134 } 135 /** 136 * return the options as a pipe-delimited list 137 * 138 * @return 139 */ valueList()140 public static String valueList() { 141 return String.join( 142 "|", 143 Lists.newArrayList(RunType.values()).stream() 144 .map(t -> t.name()) 145 .toArray(String[]::new)); 146 } 147 } 148 149 private static final StandardCodes sc = StandardCodes.make(); 150 private Set<String> defaultContentLocales = 151 SupplementalDataInfo.getInstance().getDefaultContentLocales(); 152 private Set<String> skippedDefaultContentLocales = new TreeSet<>(); 153 154 private class AvailableLocales { 155 Set<String> modern = new TreeSet<>(); 156 Set<String> full = new TreeSet<>(); 157 } 158 159 private AvailableLocales avl = new AvailableLocales(); 160 private Gson gson = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create(); 161 private static final Options options = 162 new Options( 163 "Usage: LDML2JsonConverter [OPTIONS] [FILES]\n" 164 + "This program converts CLDR data to the JSON format.\n" 165 + "Please refer to the following options. \n" 166 + "\texample: org.unicode.cldr.json.Ldml2JsonConverter -c xxx -d yyy") 167 .add( 168 "bcp47", 169 'B', 170 "(true|false)", 171 "true", 172 "Whether to strictly use BCP47 tags in filenames and data. Defaults to true.") 173 .add( 174 "bcp47-no-subtags", 175 'T', 176 "(true|false)", 177 "true", 178 "In BCP47 mode, ignore locales with subtags such as en-US-u-va-posix. Defaults to true.") 179 .add( 180 "commondir", 181 'c', 182 ".*", 183 CLDRPaths.COMMON_DIRECTORY, 184 "Common directory for CLDR files, defaults to CldrUtility.COMMON_DIRECTORY") 185 .add( 186 "destdir", 187 'd', 188 ".*", 189 CLDRPaths.GEN_DIRECTORY, 190 "Destination directory for output files, defaults to CldrUtility.GEN_DIRECTORY") 191 .add( 192 "match", 193 'm', 194 ".*", 195 ".*", 196 "Regular expression to define only specific locales or files to be generated") 197 .add( 198 "type", 199 't', 200 "(" + RunType.valueList() + ")", 201 "all", 202 "Type of CLDR data being generated, such as main, supplemental, or segments. All gets all.") 203 .add( 204 "resolved", 205 'r', 206 "(true|false)", 207 "false", 208 "Whether the output JSON for the main directory should be based on resolved or unresolved data") 209 .add( 210 "Redundant", 211 'R', 212 "(true|false)", 213 "false", 214 "Include redundant data from code-fallback and constructed") 215 .add( 216 "draftstatus", 217 's', 218 "(approved|contributed|provisional|unconfirmed)", 219 "unconfirmed", 220 "The minimum draft status of the output data") 221 .add( 222 "coverage", 223 'l', 224 "(minimal|basic|moderate|modern|comprehensive|optional)", 225 "optional", 226 "The maximum coverage level of the output data") 227 .add( 228 "packagelist", 229 'P', 230 "(true|false)", 231 "true", 232 "Whether to output PACKAGES.md and cldr-core/cldr-packages.json (during supplemental/cldr-core)") 233 .add( 234 "fullnumbers", 235 'n', 236 "(true|false)", 237 "false", 238 "Whether the output JSON should output data for all numbering systems, even those not used in the locale") 239 .add( 240 "other", 241 'o', 242 "(true|false)", 243 "false", 244 "Whether to write out the 'other' section, which contains any unmatched paths") 245 .add( 246 "packages", 247 'p', 248 "(true|false)", 249 "false", 250 "Whether to group data files into installable packages") 251 .add( 252 "identity", 253 'i', 254 "(true|false)", 255 "true", 256 "Whether to copy the identity info into all sections containing data") 257 .add("konfig", 'k', ".*", null, "LDML to JSON configuration file") 258 .add( 259 "pkgversion", 260 'V', 261 ".*", 262 getDefaultVersion(), 263 "Version to be used in writing package files") 264 .add( 265 "Modern", 266 'M', 267 "(true|false)", 268 "false", 269 "Whether to include the -modern tier") 270 // Primarily useful for non-Maven build systems where CldrUtility.LICENSE may 271 // not be available as it is put in place by pom.xml 272 .add( 273 "license-file", 274 'L', 275 ".*", 276 "", 277 "Override the license file included in the bundle"); 278 main(String[] args)279 public static void main(String[] args) throws Exception { 280 System.out.println(GEAR_ICON + " " + Ldml2JsonConverter.class.getName() + " options:"); 281 options.parse(args, true); 282 283 Timer overallTimer = new Timer(); 284 overallTimer.start(); 285 final String rawType = options.get("type").getValue(); 286 287 if (RunType.all.name().equals(rawType)) { 288 // Running all types 289 for (final RunType t : RunType.values()) { 290 if (t == RunType.all) continue; 291 System.out.println(); 292 System.out.println( 293 TYPE_ICON + "####################### " + t + " #######################"); 294 Timer subTimer = new Timer(); 295 subTimer.start(); 296 processType(t.name()); 297 System.out.println( 298 TYPE_ICON + " " + t + "\tFinished in " + subTimer.toMeasureString()); 299 System.out.println(); 300 } 301 } else { 302 processType(rawType); 303 } 304 305 System.out.println( 306 "\n\n###\n\n" 307 + DONE_ICON 308 + " Finished everything in " 309 + overallTimer.toMeasureString()); 310 } 311 processType(final String runType)312 static void processType(final String runType) throws Exception { 313 Ldml2JsonConverter l2jc = 314 new Ldml2JsonConverter( 315 options.get("commondir").getValue(), 316 options.get("destdir").getValue(), 317 runType, 318 Boolean.parseBoolean(options.get("fullnumbers").getValue()), 319 Boolean.parseBoolean(options.get("resolved").getValue()), 320 options.get("coverage").getValue(), 321 options.get("match").getValue(), 322 Boolean.parseBoolean(options.get("packages").getValue()), 323 options.get("konfig").getValue(), 324 options.get("pkgversion").getValue(), 325 Boolean.parseBoolean(options.get("bcp47").getValue()), 326 Boolean.parseBoolean(options.get("bcp47-no-subtags").getValue()), 327 Boolean.parseBoolean(options.get("Modern").getValue()), 328 Boolean.parseBoolean(options.get("Redundant").getValue()), 329 Optional.ofNullable(options.get("license-file").getValue()) 330 .filter(s -> !s.isEmpty())); 331 332 DraftStatus status = DraftStatus.valueOf(options.get("draftstatus").getValue()); 333 l2jc.processDirectory(runType, status); 334 } 335 336 // The CLDR file directory where those official XML files will be found. 337 private String cldrCommonDir; 338 // Where the generated JSON files will be stored. 339 private String outputDir; 340 // Whether data in main should output all numbering systems, even those not in use in the 341 // locale. 342 private boolean fullNumbers; 343 // Whether data in main should be resolved for output. 344 private boolean resolve; 345 // Used to match specific locales for output 346 private String match; 347 // Used to filter based on coverage 348 private int coverageValue; 349 // Whether we should write output files into installable packages 350 private boolean writePackages; 351 // Type of run for this converter: main, supplemental, or segments 352 private final RunType type; 353 // include Redundant data such as apc="apc", en_US="en (US)" 354 private boolean includeRedundant; 355 356 static class JSONSection implements Comparable<JSONSection> { 357 public String section; 358 public Pattern pattern; 359 public String packageName; 360 361 @Override compareTo(JSONSection other)362 public int compareTo(JSONSection other) { 363 return section.compareTo(other.section); 364 } 365 } 366 367 private Map<String, String> dependencies; 368 private List<JSONSection> sections; 369 private Set<String> packages; 370 private final String pkgVersion; 371 private final boolean strictBcp47; 372 private final boolean writeModernPackage; 373 private final Optional<String> licenseFile; 374 private final boolean skipBcp47LocalesWithSubtags; 375 private LdmlConfigFileReader configFileReader; 376 Ldml2JsonConverter( String cldrDir, String outputDir, String runType, boolean fullNumbers, boolean resolve, String coverage, String match, boolean writePackages, String configFile, String pkgVersion, boolean strictBcp47, boolean skipBcp47LocalesWithSubtags, boolean writeModernPackage, boolean includeRedundant, Optional<String> licenseFile)377 public Ldml2JsonConverter( 378 String cldrDir, 379 String outputDir, 380 String runType, 381 boolean fullNumbers, 382 boolean resolve, 383 String coverage, 384 String match, 385 boolean writePackages, 386 String configFile, 387 String pkgVersion, 388 boolean strictBcp47, 389 boolean skipBcp47LocalesWithSubtags, 390 boolean writeModernPackage, 391 boolean includeRedundant, 392 Optional<String> licenseFile) { 393 this.writeModernPackage = writeModernPackage; 394 this.strictBcp47 = strictBcp47; 395 this.skipBcp47LocalesWithSubtags = strictBcp47 && skipBcp47LocalesWithSubtags; 396 this.cldrCommonDir = cldrDir; 397 this.outputDir = outputDir; 398 try { 399 this.type = RunType.valueOf(runType); 400 } catch (IllegalArgumentException | NullPointerException e) { 401 throw new RuntimeException( 402 "runType (-t) invalid: " + runType + " must be one of " + RunType.valueList(), 403 e); 404 } 405 this.fullNumbers = fullNumbers; 406 this.resolve = resolve; 407 this.match = match; 408 this.writePackages = writePackages; 409 this.coverageValue = Level.get(coverage).getLevel(); 410 this.pkgVersion = pkgVersion; 411 412 LdmlConvertRules.addVersionHandler(pkgVersion.split("\\.")[0]); 413 414 configFileReader = new LdmlConfigFileReader(); 415 configFileReader.read(configFile, type); 416 this.dependencies = configFileReader.getDependencies(); 417 this.sections = configFileReader.getSections(); 418 this.packages = new TreeSet<>(); 419 this.includeRedundant = includeRedundant; 420 this.licenseFile = licenseFile; 421 } 422 423 /** 424 * @see XPathParts#addInternal 425 */ 426 static final Pattern ANNOTATION_CP_REMAP = 427 PatternCache.get("^(.*)\\[@cp=\"(\\[|\\]|'|\"|@|/|=)\"\\](.*)$"); 428 429 /** 430 * Transform the path by applying PATH_TRANSFORMATIONS rules. 431 * 432 * @param pathStr The path string being transformed. 433 * @return The transformed path. 434 */ transformPath(final String pathStr, final String pathPrefix)435 private String transformPath(final String pathStr, final String pathPrefix) { 436 String result = pathStr; 437 438 // handle annotation cp value 439 Matcher cpm = ANNOTATION_CP_REMAP.matcher(result); 440 if (cpm.matches()) { 441 // We need to avoid breaking the syntax not just of JSON, but of XPATH. 442 final String badCodepointRange = cpm.group(2); 443 StringBuilder sb = new StringBuilder(cpm.group(1)).append("[@cp=\""); 444 // JSON would handle a wide range of things if escaped, but XPATH will not. 445 if (badCodepointRange.codePointCount(0, badCodepointRange.length()) != 1) { 446 // forbid more than one U+ (because we will have to unescape it.) 447 throw new IllegalArgumentException( 448 "Need exactly one codepoint in the @cp string, but got " 449 + badCodepointRange 450 + " in xpath " 451 + pathStr); 452 } 453 badCodepointRange 454 .codePoints() 455 .forEach(cp -> sb.append("U+").append(Integer.toHexString(cp).toUpperCase())); 456 sb.append("\"]").append(cpm.group(3)); 457 result = sb.toString(); 458 } 459 460 logger.finest(" IN pathStr : " + result); 461 result = LdmlConvertRules.PathTransformSpec.applyAll(result); 462 result = result.replaceFirst("/ldml/", pathPrefix); 463 result = result.replaceFirst("/supplementalData/", pathPrefix); 464 465 if (result.startsWith("//cldr/supplemental/references/reference")) { 466 // no change 467 } else if (strictBcp47) { 468 // Look for something like <!--@MATCH:set/validity/locale--> in DTD 469 if (result.contains("localeDisplayNames/languages/language")) { 470 if (result.contains("type=\"root\"")) { 471 // This is strictBcp47 472 // Drop translation for 'root' as it conflicts with 'und' 473 return ""; // 'drop this path' 474 } 475 result = fixXpathBcp47(result, "language", "type"); 476 } else if (result.contains("likelySubtags/likelySubtag")) { 477 if (!result.contains("\"iw\"") 478 && !result.contains("\"in\"") 479 && !result.contains("\"ji\"")) { 480 // Special case: preserve 'iw' and 'in' likely subtags 481 result = fixXpathBcp47(result, "likelySubtag", "from", "to"); 482 } else { 483 result = underscoreToHypen(result); 484 logger.warning("Including aliased likelySubtags: " + result); 485 } 486 } else if (result.startsWith("//cldr/supplemental/weekData/weekOfPreference")) { 487 result = fixXpathBcp47(result, "weekOfPreference", "locales"); 488 } else if (result.startsWith("//cldr/supplemental/metadata/defaultContent")) { 489 result = fixXpathBcp47(result, "defaultContent", "locales"); 490 } else if (result.startsWith("//cldr/supplemental/grammatical") 491 && result.contains("Data/grammaticalFeatures")) { 492 result = fixXpathBcp47(result, "grammaticalFeatures", "locales"); 493 } else if (result.startsWith("//cldr/supplemental/grammatical") 494 && result.contains("Data/grammaticalDerivations")) { 495 result = fixXpathBcp47(result, "grammaticalDerivations", "locales"); 496 } else if (result.startsWith("//cldr/supplemental/dayPeriodRuleSet")) { 497 result = fixXpathBcp47(result, "dayPeriodRules", "locales"); 498 } else if (result.startsWith("//cldr/supplemental/plurals")) { 499 result = fixXpathBcp47(result, "pluralRules", "locales"); 500 } else if (result.startsWith("//cldr/supplemental/timeData/hours")) { 501 result = fixXpathBcp47MishMash(result, "hours", "regions"); 502 } else if (result.startsWith("//cldr/supplemental/parentLocales/parentLocale")) { 503 result = fixXpathBcp47(result, "parentLocale", "parent", "locales"); 504 } else if (result.startsWith( 505 "//cldr/supplemental/territoryInfo/territory/languagePopulation")) { 506 result = fixXpathBcp47(result, "languagePopulation", "type"); 507 } else if (result.contains("languages") 508 || result.contains("languageAlias") 509 || result.contains("languageMatches") 510 || result.contains("likelySubtags") 511 || result.contains("parentLocale") 512 || result.contains("locales=")) { 513 final String oldResult = result; 514 result = underscoreToHypen(result); 515 if (!oldResult.equals(result)) { 516 logger.fine(oldResult + " => " + result); 517 } 518 } 519 } else if (result.contains("languages") 520 || result.contains("languageAlias") 521 || result.contains("languageMatches") 522 || result.contains("likelySubtags") 523 || result.contains("parentLocale") 524 || result.contains("locales=")) { 525 // old behavior: just munge paths.. 526 result = underscoreToHypen(result); 527 } 528 logger.finest("OUT pathStr : " + result); 529 logger.finest("result: " + result); 530 return result; 531 } 532 533 /** Read all paths in the file, and assign each to a JSONSection. Return the map. */ mapPathsToSections( AtomicInteger readCount, int totalCount, CLDRFile file, String pathPrefix, SupplementalDataInfo sdi)534 private Map<JSONSection, List<CldrItem>> mapPathsToSections( 535 AtomicInteger readCount, 536 int totalCount, 537 CLDRFile file, 538 String pathPrefix, 539 SupplementalDataInfo sdi) 540 throws IOException, ParseException { 541 final Map<JSONSection, List<CldrItem>> sectionItems = new TreeMap<>(); 542 543 String locID = file.getLocaleID(); 544 Matcher noNumberingSystemMatcher = LdmlConvertRules.NO_NUMBERING_SYSTEM_PATTERN.matcher(""); 545 Matcher numberingSystemMatcher = LdmlConvertRules.NUMBERING_SYSTEM_PATTERN.matcher(""); 546 Matcher rootIdentityMatcher = LdmlConvertRules.ROOT_IDENTITY_PATTERN.matcher(""); 547 Matcher versionMatcher = LdmlConvertRules.VERSION_PATTERN.matcher(""); 548 Set<String> activeNumberingSystems = new TreeSet<>(); 549 activeNumberingSystems.add("latn"); // Always include latin script numbers 550 for (String np : LdmlConvertRules.ACTIVE_NUMBERING_SYSTEM_XPATHS) { 551 String ns = file.getWinningValue(np); 552 if (ns != null && ns.length() > 0) { 553 activeNumberingSystems.add(ns); 554 } 555 } 556 final DtdType fileDtdType = file.getDtdType(); 557 CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo(); 558 // read paths in DTD order. The order is critical for JSON processing. 559 final CLDRFile.Status status = new CLDRFile.Status(); 560 for (Iterator<String> it = 561 file.iterator("", DtdData.getInstance(fileDtdType).getDtdComparator(null)); 562 it.hasNext(); ) { 563 int cv = Level.UNDETERMINED.getLevel(); 564 final String path = it.next(); 565 566 // Check for code-fallback and constructed first, even before fullpath and value 567 final String localeWhereFound = file.getSourceLocaleID(path, status); 568 if (!includeRedundant 569 && (localeWhereFound.equals(XMLSource.CODE_FALLBACK_ID) 570 || // language[@type="apc"] = apc : missing 571 status.pathWhereFound.equals( 572 GlossonymConstructor 573 .PSEUDO_PATH))) { // language[@type="fa_AF"] = fa (AF) 574 // or Farsi (Afghanistan) : missing 575 // Don't include these paths. 576 continue; 577 } 578 579 // now get the fullpath and value 580 String fullPath = file.getFullXPath(path); 581 String value = file.getWinningValue(path); 582 583 if (fullPath == null) { 584 fullPath = path; 585 } 586 587 if (!CLDRFile.isSupplementalName(locID) 588 && path.startsWith("//ldml/") 589 && !path.contains("/identity")) { 590 cv = covInfo.getCoverageValue(path, locID); 591 } 592 if (cv > coverageValue) { 593 continue; 594 } 595 // Discard root identity element unless the locale is root 596 // TODO: CLDR-17790 this code should not be needed. 597 rootIdentityMatcher.reset(fullPath); 598 if (rootIdentityMatcher.matches() && !"root".equals(locID)) { 599 continue; 600 } 601 602 // discard version stuff 603 versionMatcher.reset(fullPath); 604 if (versionMatcher.matches()) { 605 // drop //ldml/identity/version entirely. 606 continue; 607 } 608 609 // automatically filter out number symbols and formats without a numbering system 610 noNumberingSystemMatcher.reset(fullPath); 611 if (noNumberingSystemMatcher.matches()) { 612 continue; 613 } 614 615 // Filter out non-active numbering systems data unless fullNumbers is specified. 616 numberingSystemMatcher.reset(fullPath); 617 if (numberingSystemMatcher.matches() && !fullNumbers) { 618 XPathParts xpp = XPathParts.getFrozenInstance(fullPath); 619 String currentNS = xpp.getAttributeValue(2, "numberSystem"); 620 if (currentNS != null && !activeNumberingSystems.contains(currentNS)) { 621 continue; 622 } 623 } 624 625 // Handle the no inheritance marker. 626 if (resolve && CldrUtility.NO_INHERITANCE_MARKER.equals(value)) { 627 continue; 628 } 629 630 // discard draft before transforming 631 final String pathNoDraft = CLDRFile.DRAFT_PATTERN.matcher(path).replaceAll(""); 632 final String fullPathNoDraft = CLDRFile.DRAFT_PATTERN.matcher(fullPath).replaceAll(""); 633 634 final String pathNoXmlSpace = 635 CLDRFile.XML_SPACE_PATTERN.matcher(pathNoDraft).replaceAll(""); 636 final String fullPathNoXmlSpace = 637 CLDRFile.XML_SPACE_PATTERN.matcher(fullPathNoDraft).replaceAll(""); 638 639 final String transformedPath = transformPath(pathNoXmlSpace, pathPrefix); 640 final String transformedFullPath = transformPath(fullPathNoXmlSpace, pathPrefix); 641 642 if (transformedPath.isEmpty()) { 643 continue; // skip this path 644 } 645 646 for (JSONSection js : 647 sections) { // TODO: move to subfunction, error if >1 section matches 648 if (js.pattern.matcher(transformedPath).matches()) { 649 CldrItem item = 650 new CldrItem( 651 transformedPath, transformedFullPath, path, fullPath, value); 652 653 List<CldrItem> cldrItems = sectionItems.get(js); 654 if (cldrItems == null) { 655 cldrItems = new ArrayList<>(); 656 } 657 cldrItems.add(item); 658 sectionItems.put(js, cldrItems); 659 break; 660 } 661 } 662 } 663 664 // TODO: move matcher out of inner loop 665 final Matcher versionInfoMatcher = VERSION_INFO_PATTERN.matcher(""); 666 // Automatically copy the version info to any sections that had real data in them. 667 JSONSection otherSection = sections.get(sections.size() - 1); 668 List<CldrItem> others = sectionItems.get(otherSection); 669 if (others == null) { 670 return sectionItems; 671 } 672 List<CldrItem> otherSectionItems = new ArrayList<>(others); 673 int addedItemCount = 0; 674 boolean copyIdentityInfo = Boolean.parseBoolean(options.get("identity").getValue()); 675 676 for (CldrItem item : otherSectionItems) { 677 String thisPath = item.getPath(); 678 versionInfoMatcher.reset(thisPath); 679 if (versionInfoMatcher.matches()) { 680 for (JSONSection js : sections) { 681 if (sectionItems.get(js) != null 682 && !js.section.equals("other") 683 && copyIdentityInfo) { 684 List<CldrItem> hit = sectionItems.get(js); 685 hit.add(addedItemCount, item); 686 sectionItems.put(js, hit); 687 } 688 if (js.section.equals("other")) { // did not match one of the regular sections 689 List<CldrItem> hit = sectionItems.get(js); 690 hit.remove(item); 691 sectionItems.put(js, hit); 692 } 693 } 694 addedItemCount++; 695 } 696 } 697 return sectionItems; 698 } 699 700 static final Pattern VERSION_INFO_PATTERN = PatternCache.get(".*/(identity|version).*"); 701 static final Pattern HAS_SUBTAG = PatternCache.get(".*-[a-z]-.*"); 702 703 /** 704 * Convert CLDR's XML data to JSON format. 705 * 706 * @param file CLDRFile object. 707 * @param outFilename The file name used to save JSON data. 708 * @throws IOException 709 * @throws ParseException 710 * @return total items written in all files. (if 0, file had no effect) 711 */ convertCldrItems( AtomicInteger readCount, int totalCount, String dirName, String filename, String pathPrefix, final Map<JSONSection, List<CldrItem>> sectionItems)712 private int convertCldrItems( 713 AtomicInteger readCount, 714 int totalCount, 715 String dirName, 716 String filename, 717 String pathPrefix, 718 final Map<JSONSection, List<CldrItem>> sectionItems) 719 throws IOException, ParseException { 720 // zone and timezone items are queued for sorting first before they are 721 // processed. 722 723 final String filenameAsLangTag = unicodeLocaleToString(filename); 724 725 if (skipBcp47LocalesWithSubtags 726 && type.locales() 727 && HAS_SUBTAG.matcher(filenameAsLangTag).matches()) { 728 // Has a subtag, so skip it. 729 // It will show up in the "no output" list. 730 return 0; 731 } 732 733 int totalItemsInFile = 0; 734 735 List<Pair<String, Integer>> outputProgress = new LinkedList<>(); 736 737 for (JSONSection js : sections) { 738 if (js.section.equals("IGNORE")) { 739 continue; 740 } 741 String outFilename; 742 if (type == RunType.rbnf) { 743 outFilename = filenameAsLangTag + ".json"; 744 } else if (type == RunType.bcp47) { 745 outFilename = filename + ".json"; 746 } else if (type == RunType.transforms) { 747 outFilename = filename + ".json"; 748 } else if (js.section.equals("other")) { 749 // If you see other-___.json, it means items that were missing from 750 // JSON_config_*.txt 751 outFilename = js.section + "-" + filename + ".json"; // Use original filename 752 } else { 753 outFilename = js.section + ".json"; 754 } 755 String tier = ""; 756 boolean writeOther = Boolean.parseBoolean(options.get("other").getValue()); 757 if (js.section.equals("other") && !writeOther) { 758 continue; 759 } else { 760 StringBuilder outputDirname = new StringBuilder(outputDir); 761 if (writePackages) { 762 if (type.tiered()) { 763 LocaleIDParser lp = new LocaleIDParser(); 764 lp.set(filename); 765 if (defaultContentLocales.contains(filename) 766 && lp.getRegion().length() > 0) { 767 if (type == RunType.main) { 768 skippedDefaultContentLocales.add(filenameAsLangTag); 769 } 770 continue; 771 } 772 final boolean isModernTier = localeIsModernTier(filename); 773 if (isModernTier && writeModernPackage) { 774 tier = MODERN_TIER_SUFFIX; 775 if (type == RunType.main) { 776 avl.modern.add(filenameAsLangTag); 777 } 778 } else { 779 tier = FULL_TIER_SUFFIX; 780 } 781 if (type == RunType.main) { 782 avl.full.add(filenameAsLangTag); 783 } 784 } else if (type == RunType.rbnf 785 || type == RunType.bcp47 786 || type == RunType.transforms) { 787 // untiered, just use the name 788 js.packageName = type.name(); 789 tier = ""; 790 } 791 if (js.packageName != null) { 792 String packageName = CLDR_PKG_PREFIX + js.packageName + tier; 793 outputDirname.append("/" + packageName); 794 packages.add(packageName); 795 } 796 outputDirname.append("/" + dirName + "/"); 797 if (type.tiered()) { 798 outputDirname.append(filenameAsLangTag); 799 } 800 logger.fine("outDir: " + outputDirname); 801 logger.fine("pack: " + js.packageName); 802 logger.fine("dir: " + dirName); 803 } else { 804 outputDirname.append("/" + filename); 805 } 806 807 assert (tier.isEmpty() == !type.tiered()); 808 809 List<String> outputDirs = new ArrayList<>(); 810 outputDirs.add(outputDirname.toString()); 811 if (writePackages && tier.equals(MODERN_TIER_SUFFIX) && js.packageName != null) { 812 // if it is in 'modern', add it to 'full' and core also. 813 outputDirs.add( 814 outputDirname 815 .toString() 816 .replaceFirst(MODERN_TIER_SUFFIX, FULL_TIER_SUFFIX)); 817 // Also need to make sure that the full and core package is added 818 packages.add(CLDR_PKG_PREFIX + js.packageName + FULL_TIER_SUFFIX); 819 } 820 821 for (String outputDir : outputDirs) { 822 List<CldrItem> theItems = sectionItems.get(js); 823 if (theItems == null || theItems.size() == 0) { 824 logger.fine( 825 () -> 826 ">" 827 + progressPrefix(readCount, totalCount) 828 + outputDir 829 + " - no items to write in " 830 + js.section); // mostly noise 831 continue; 832 } 833 logger.fine( 834 () -> 835 ("?" 836 + progressPrefix( 837 readCount, totalCount, filename, js.section) 838 + " - " 839 + theItems.size() 840 + " item(s)" 841 + "\r")); 842 // Create the output dir if it doesn't exist 843 File dir = new File(outputDir.toString()); 844 if (!dir.exists()) { 845 dir.mkdirs(); 846 } 847 JsonObject out = new JsonObject(); // root object for writing 848 849 ArrayList<CldrItem> sortingItems = new ArrayList<>(); 850 ArrayList<CldrItem> arrayItems = new ArrayList<>(); 851 852 ArrayList<CldrNode> nodesForLastItem = new ArrayList<>(); 853 String lastLeadingArrayItemPath = null; 854 String leadingArrayItemPath = ""; 855 int valueCount = 0; 856 String previousIdentityPath = null; 857 for (CldrItem item : theItems) { 858 if (item.getPath().isEmpty()) { 859 throw new IllegalArgumentException( 860 "empty xpath in " 861 + filename 862 + " section " 863 + js.packageName 864 + "/" 865 + js.section); 866 } 867 if (type == RunType.rbnf) { 868 item.adjustRbnfPath(); 869 } 870 871 // items in the identity section of a file should only ever contain the 872 // lowest level, even if using 873 // resolving source, so if we have duplicates ( caused by attributes used as 874 // a value ) then suppress 875 // them here. 876 if (item.getPath().contains("/identity/")) { 877 String[] parts = item.getPath().split("\\["); 878 if (parts[0].equals(previousIdentityPath)) { 879 continue; 880 } else { 881 XPathParts xpp = XPathParts.getFrozenInstance(item.getPath()); 882 String territory = xpp.findAttributeValue("territory", "type"); 883 LocaleIDParser lp = new LocaleIDParser().set(filename); 884 if (territory != null 885 && territory.length() > 0 886 && !territory.equals(lp.getRegion())) { 887 continue; 888 } 889 previousIdentityPath = parts[0]; 890 } 891 } 892 893 if (item.getUntransformedPath() 894 .startsWith("//supplementalData/transforms")) { 895 // here, write the raw data 896 final String rawTransformFile = filename + TRANSFORM_RAW_SUFFIX; 897 try (PrintWriter outf = 898 FileUtilities.openUTF8Writer(outputDir, rawTransformFile)) { 899 outf.println(item.getValue().trim()); 900 // note: not logging the write here- it will be logged when the 901 // .json file is written. 902 } 903 final String path = item.getPath(); 904 item.setPath(fixTransformPath(path)); 905 final String fullPath = item.getFullPath(); 906 item.setFullPath(fixTransformPath(fullPath)); 907 // the value is now the raw filename 908 item.setValue(rawTransformFile); 909 } 910 911 // some items need to be split to multiple item before processing. None 912 // of those items need to be sorted. 913 // Applies to SPLITTABLE_ATTRS attributes. 914 CldrItem[] items = item.split(); 915 if (items == null) { 916 // Nothing to split. Make it a 1-element array. 917 items = new CldrItem[1]; 918 items[0] = item; 919 } 920 valueCount += items.length; 921 922 // Hard code this part. 923 if (item.getUntransformedPath().contains("unitPreference")) { 924 // Need to do more transforms on this one, so just output version/etc 925 // here. 926 continue; 927 } 928 929 for (CldrItem newItem : items) { 930 // alias will be dropped in conversion, don't count it. 931 if (newItem.isAliasItem()) { 932 valueCount--; 933 } 934 935 // Items like zone items need to be sorted first before write them out. 936 if (newItem.needsSort()) { 937 resolveArrayItems(out, nodesForLastItem, arrayItems); 938 sortingItems.add(newItem); 939 } else { 940 Matcher matcher = 941 LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher( 942 newItem.getPath()); 943 if (matcher.matches()) { 944 resolveSortingItems(out, nodesForLastItem, sortingItems); 945 leadingArrayItemPath = matcher.group(1); 946 if (lastLeadingArrayItemPath != null 947 && !lastLeadingArrayItemPath.equals( 948 leadingArrayItemPath)) { 949 resolveArrayItems(out, nodesForLastItem, arrayItems); 950 } 951 lastLeadingArrayItemPath = leadingArrayItemPath; 952 arrayItems.add(newItem); 953 } else { 954 // output a single item 955 resolveSortingItems(out, nodesForLastItem, sortingItems); 956 resolveArrayItems(out, nodesForLastItem, arrayItems); 957 outputCldrItem(out, nodesForLastItem, newItem); 958 lastLeadingArrayItemPath = ""; 959 } 960 } 961 } 962 } 963 964 resolveSortingItems(out, nodesForLastItem, sortingItems); 965 resolveArrayItems(out, nodesForLastItem, arrayItems); 966 if (js.section.contains("unitPreferenceData")) { 967 outputUnitPreferenceData(js, theItems, out, nodesForLastItem); 968 } 969 970 // Special processing for transforms. 971 if (type == RunType.transforms) { 972 final JsonObject jo = out.getAsJsonObject("transforms"); 973 if (jo == null || jo.isEmpty()) { 974 throw new RuntimeException( 975 "Could not get transforms object in " + filename); 976 } 977 @SuppressWarnings("unchecked") 978 final Entry<String, JsonElement>[] s = jo.entrySet().toArray(new Entry[0]); 979 if (s == null || s.length != 1) { 980 throw new RuntimeException( 981 "Could not get 1 subelement of transforms in " + filename); 982 } 983 // key doesn't matter. 984 // move subitem up 985 out = s[0].getValue().getAsJsonObject(); 986 final Entry<String, JsonElement>[] s2 = 987 out.entrySet().toArray(new Entry[0]); 988 if (s2 == null || s2.length != 1) { 989 throw new RuntimeException( 990 "Could not get 1 sub-subelement of transforms in " + filename); 991 } 992 // move sub-subitem up. 993 out = s2[0].getValue().getAsJsonObject(); 994 } 995 996 // write JSON 997 try (PrintWriter outf = FileUtilities.openUTF8Writer(outputDir, outFilename)) { 998 outf.println(gson.toJson(out)); 999 } 1000 1001 String outPath = 1002 new File(outputDir.substring(this.outputDir.length()), outFilename) 1003 .getPath(); 1004 outputProgress.add( 1005 Pair.of(String.format("%20s %s", js.section, outPath), valueCount)); 1006 logger.fine( 1007 ">" 1008 + progressPrefix(readCount, totalCount, filename, js.section) 1009 + String.format("…%s (%d values)", outPath, valueCount)); 1010 1011 totalItemsInFile += valueCount; 1012 } 1013 } 1014 } // this is the only normal output with debug off 1015 StringBuilder outStr = new StringBuilder(); 1016 if (!outputProgress.isEmpty()) { 1017 // Put these first, so the percent is at the end. 1018 for (final Pair<String, Integer> outputItem : outputProgress) { 1019 outStr.append( 1020 String.format("\t%6d %s\n", outputItem.getSecond(), outputItem.getFirst())); 1021 } 1022 outStr.append( 1023 String.format( 1024 "%s%-12s\t %s\n", 1025 progressPrefix(readCount, totalCount), 1026 filename, 1027 valueSectionsFormat(totalItemsInFile, outputProgress.size()))); 1028 } else { 1029 outStr.append( 1030 String.format( 1031 "%s%-12s\t" + NONE_ICON + " (no output)\n", 1032 progressPrefix(readCount, totalCount), 1033 filename)); 1034 } 1035 synchronized (readCount) { // to prevent interleaved output 1036 System.out.print(outStr); 1037 } 1038 return totalItemsInFile; 1039 } 1040 1041 /** 1042 * Fixup an XPathParts with a specific transform element 1043 * 1044 * @param xpp the XPathParts to modify 1045 * @param attribute the attribute name, such as "alias" 1046 */ fixTransformPath(final XPathParts xpp, final String attribute)1047 private static final void fixTransformPath(final XPathParts xpp, final String attribute) { 1048 final String v = xpp.getAttributeValue(-2, attribute); // on penultimate element 1049 if (v == null) return; 1050 final Set<String> aliases = new HashSet<>(); 1051 final Set<String> bcpAliases = new HashSet<>(); 1052 for (final String s : v.split(" ")) { 1053 final String q = Locale.forLanguageTag(s).toLanguageTag(); 1054 if (s.equals(q)) { 1055 // bcp47 round trips- add to bcp list 1056 bcpAliases.add(s); 1057 } else { 1058 // different - add to other aliases. 1059 aliases.add(s); 1060 } 1061 } 1062 if (aliases.isEmpty()) { 1063 xpp.removeAttribute(-2, attribute); 1064 } else { 1065 xpp.setAttribute(-2, attribute, String.join(" ", aliases.toArray(new String[0]))); 1066 } 1067 if (bcpAliases.isEmpty()) { 1068 xpp.removeAttribute(-2, attribute + "Bcp47"); 1069 } else { 1070 xpp.setAttribute( 1071 -2, attribute + "Bcp47", String.join(" ", bcpAliases.toArray(new String[0]))); 1072 } 1073 } 1074 1075 /** 1076 * Fixup a transform path, expanding the alias and backwardAlias into bcp47 and non-bcp47 1077 * attributes. 1078 */ fixTransformPath(final String path)1079 private static final String fixTransformPath(final String path) { 1080 final XPathParts xpp = XPathParts.getFrozenInstance(path).cloneAsThawed(); 1081 fixTransformPath(xpp, "alias"); 1082 fixTransformPath(xpp, "backwardAlias"); 1083 return xpp.toString(); 1084 } 1085 valueSectionsFormat(int values, int sections)1086 private static String valueSectionsFormat(int values, int sections) { 1087 return MessageFormat.format( 1088 "({0, plural, one {# value} other {# values}} in {1, plural, one {# section} other {# sections}})", 1089 values, 1090 sections); 1091 } 1092 localeIsModernTier(String filename)1093 private boolean localeIsModernTier(String filename) { 1094 Level lev = CalculatedCoverageLevels.getInstance().getEffectiveCoverageLevel(filename); 1095 if (lev == null) return false; 1096 return lev.isAtLeast(Level.MODERN); 1097 } 1098 localeIsBasicTier(String filename)1099 private boolean localeIsBasicTier(String filename) { 1100 Level lev = CalculatedCoverageLevels.getInstance().getEffectiveCoverageLevel(filename); 1101 if (lev == null) return false; 1102 return lev.isAtLeast(Level.BASIC); 1103 } 1104 1105 /** 1106 * Entire xpaths and random short strings are passed through this function. Not really Locale ID 1107 * to Language Tag. 1108 * 1109 * @param filename 1110 * @return 1111 */ underscoreToHypen(String filename)1112 private String underscoreToHypen(String filename) { 1113 return filename.replaceAll("_", "-"); 1114 } 1115 1116 /** 1117 * Bottleneck for converting Unicode Locale ID (root, ca_ES_VALENCIA) to String for filename or 1118 * data item. If strictBcp47 is true (default) then it will convert to (und, ca-ES-valencia) 1119 * 1120 * @param locale 1121 * @return 1122 */ unicodeLocaleToString(String locale)1123 private final String unicodeLocaleToString(String locale) { 1124 if (strictBcp47) { 1125 return CLDRLocale.toLanguageTag(locale); 1126 } else { 1127 return underscoreToHypen(locale); 1128 } 1129 } 1130 1131 Pattern IS_REGION_CODE = PatternCache.get("([A-Z][A-Z])|([0-9][0-9][0-9])"); 1132 /** 1133 * Bottleneck for converting Unicode Locale ID (root, ca_ES_VALENCIA) to String for filename or 1134 * data item. If strictBcp47 is true (default) then it will convert to (und, ca-ES-valencia) 1135 * Differs from unicodeLocaleToString in that it will preserve all uppercase region ids 1136 * 1137 * @param locale 1138 * @return 1139 */ unicodeLocaleMishMashToString(String locale)1140 private final String unicodeLocaleMishMashToString(String locale) { 1141 if (strictBcp47) { 1142 if (IS_REGION_CODE.matcher(locale).matches()) { 1143 return locale; 1144 } else { 1145 return CLDRLocale.toLanguageTag(locale); 1146 } 1147 } else { 1148 return underscoreToHypen(locale); 1149 } 1150 } 1151 1152 /** 1153 * Fixup a path to be BCP47 compliant 1154 * 1155 * @param path XPath (usually ends in elementName, but not necessarily) 1156 * @param elementName element to fixup 1157 * @param attributeNames list of attributes to fix 1158 * @return new path 1159 */ fixXpathBcp47(final String path, String elementName, String... attributeNames)1160 final String fixXpathBcp47(final String path, String elementName, String... attributeNames) { 1161 final XPathParts xpp = XPathParts.getFrozenInstance(path).cloneAsThawed(); 1162 for (final String attributeName : attributeNames) { 1163 final String oldValue = xpp.findAttributeValue(elementName, attributeName); 1164 if (oldValue == null) continue; 1165 final String oldValues[] = oldValue.split(" "); 1166 String newValue = 1167 Arrays.stream(oldValues) 1168 .map((String s) -> unicodeLocaleToString(s)) 1169 .collect(Collectors.joining(" ")); 1170 if (!oldValue.equals(newValue)) { 1171 xpp.setAttribute(elementName, attributeName, newValue); 1172 logger.finest(attributeName + " = " + oldValue + " -> " + newValue); 1173 } 1174 } 1175 return xpp.toString(); 1176 } 1177 1178 /** 1179 * Fixup a path to be BCP47 compliant …but support a mishmash of regions and locale ids 1180 * CLDR-15069 1181 * 1182 * @param path XPath (usually ends in elementName, but not necessarily) 1183 * @param elementName element to fixup 1184 * @param attributeNames list of attributes to fix 1185 * @return new path 1186 */ fixXpathBcp47MishMash( final String path, String elementName, String... attributeNames)1187 final String fixXpathBcp47MishMash( 1188 final String path, String elementName, String... attributeNames) { 1189 final XPathParts xpp = XPathParts.getFrozenInstance(path).cloneAsThawed(); 1190 for (final String attributeName : attributeNames) { 1191 final String oldValue = xpp.findAttributeValue(elementName, attributeName); 1192 if (oldValue == null) continue; 1193 final String oldValues[] = oldValue.split(" "); 1194 String newValue = 1195 Arrays.stream(oldValues) 1196 .map((String s) -> unicodeLocaleMishMashToString(s)) 1197 .collect(Collectors.joining(" ")); 1198 if (!oldValue.equals(newValue)) { 1199 xpp.setAttribute(elementName, attributeName, newValue); 1200 logger.finest(attributeName + " = " + oldValue + " -> " + newValue); 1201 } 1202 } 1203 return xpp.toString(); 1204 } 1205 outputUnitPreferenceData( JSONSection js, List<CldrItem> theItems, JsonObject out, ArrayList<CldrNode> nodesForLastItem)1206 private void outputUnitPreferenceData( 1207 JSONSection js, 1208 List<CldrItem> theItems, 1209 JsonObject out, 1210 ArrayList<CldrNode> nodesForLastItem) 1211 throws ParseException, IOException { 1212 // handle these specially. 1213 // redo earlier loop somewhat. 1214 CldrNode supplementalNode = CldrNode.createNode("cldr", "supplemental", "supplemental"); 1215 JsonElement supplementalObject = startNonleafNode(out, supplementalNode); 1216 CldrNode unitPrefNode = CldrNode.createNode("supplemental", js.section, js.section); 1217 final JsonElement o = startNonleafNode(supplementalObject, unitPrefNode); 1218 1219 // We'll directly write to 'out' 1220 1221 // Unit preference sorting is a bit more complicated, so we're going to use the CldrItems, 1222 // but collect the results more directly. 1223 1224 Map<Pair<String, String>, Map<String, List<CldrItem>>> catUsagetoRegionItems = 1225 new TreeMap<>(); 1226 1227 for (CldrItem item : theItems) { 1228 if (!item.getUntransformedPath().contains("unitPref")) { 1229 continue; 1230 } 1231 CldrItem[] items = item.split(); 1232 if (items == null) { 1233 throw new IllegalArgumentException("expected unit pref to split: " + item); 1234 } 1235 for (final CldrItem subItem : items) { 1236 // step 1: make sure the category/usage is there 1237 final XPathParts xpp = XPathParts.getFrozenInstance(subItem.getPath()); 1238 final String category = xpp.findFirstAttributeValue("category"); 1239 final String usage = xpp.findFirstAttributeValue("usage"); 1240 final String region = 1241 xpp.findFirstAttributeValue("regions"); // actually one region (split) 1242 Pair<String, String> key = Pair.of(category, usage); 1243 Map<String, List<CldrItem>> regionMap = 1244 catUsagetoRegionItems.computeIfAbsent(key, ignored -> new TreeMap<>()); 1245 List<CldrItem> perRegion = 1246 regionMap.computeIfAbsent(region, ignored -> new ArrayList<>()); 1247 perRegion.add(subItem); 1248 } 1249 } 1250 1251 // OK, now start outputting 1252 // Traverse categories/usage/regions 1253 // unitPreferenceData is already open { 1254 catUsagetoRegionItems.keySet().stream() 1255 .map(p -> p.getFirst()) 1256 .distinct() // for each category 1257 .forEach( 1258 category -> { 1259 JsonObject oo = new JsonObject(); 1260 o.getAsJsonObject().add(category, oo); 1261 1262 catUsagetoRegionItems.entrySet().stream() 1263 .filter(p -> p.getKey().getFirst().equals(category)) 1264 .forEach( 1265 ent -> { 1266 final String usage = ent.getKey().getSecond(); 1267 JsonObject ooo = new JsonObject(); 1268 oo.getAsJsonObject().add(usage, ooo); 1269 1270 ent.getValue() 1271 .forEach( 1272 (region, list) -> { 1273 JsonArray array = 1274 new JsonArray(); 1275 ooo.getAsJsonObject() 1276 .add(region, array); 1277 list.forEach( 1278 item -> { 1279 final XPathParts 1280 xpp = 1281 XPathParts 1282 .getFrozenInstance( 1283 item 1284 .getPath()); 1285 JsonObject u = 1286 new JsonObject(); 1287 array.add(u); 1288 u.addProperty( 1289 "unit", 1290 item 1291 .getValue()); 1292 if (xpp 1293 .containsAttribute( 1294 "geq")) { 1295 u.addProperty( 1296 "geq", 1297 Double 1298 .parseDouble( 1299 xpp 1300 .findFirstAttributeValue( 1301 "geq"))); 1302 } 1303 }); 1304 }); 1305 }); 1306 }); 1307 1308 // Computer, switch to 'automatic' navigation 1309 // We'll let closeNodes take over. 1310 nodesForLastItem.add(unitPrefNode); // unitPreferenceData } 1311 } 1312 1313 /** 1314 * Creates the packaging files ( i.e. package.json ) for a particular package 1315 * 1316 * @param packageName The name of the installable package 1317 */ writePackagingFiles(String outputDir, String packageName)1318 public void writePackagingFiles(String outputDir, String packageName) throws IOException { 1319 File dir = new File(outputDir.toString()); 1320 if (!dir.exists()) { 1321 dir.mkdirs(); 1322 } 1323 writePackageJson(outputDir, packageName); 1324 writeBowerJson(outputDir, packageName); 1325 writeReadme(outputDir, packageName); 1326 } 1327 1328 /** Write the ## License section */ writeCopyrightSection(PrintWriter out)1329 public void writeCopyrightSection(PrintWriter out) { 1330 out.println( 1331 CldrUtility.getCopyrightMarkdown() 1332 + "\n" 1333 + "A copy of the license is included as [LICENSE](./LICENSE)."); 1334 } 1335 1336 /** 1337 * Write the readme fragment from cldr-json-readme.md plus the copyright 1338 * 1339 * @param outf 1340 * @throws IOException 1341 */ writeReadmeSection(PrintWriter outf)1342 private void writeReadmeSection(PrintWriter outf) throws IOException { 1343 FileCopier.copy(CldrUtility.getUTF8Data("cldr-json-readme.md"), outf); 1344 outf.println(); 1345 writeCopyrightSection(outf); 1346 } 1347 writeReadme(String outputDir, String packageName)1348 public void writeReadme(String outputDir, String packageName) throws IOException { 1349 final String basePackageName = getBasePackageName(packageName); 1350 try (PrintWriter outf = 1351 FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "README.md"); ) { 1352 outf.println("# " + packageName); 1353 outf.println(); 1354 outf.println(configFileReader.getPackageDescriptions().get(basePackageName)); 1355 outf.println(); 1356 if (packageName.endsWith(FULL_TIER_SUFFIX)) { 1357 outf.println("This package contains all locales."); 1358 outf.println(); 1359 } else if (packageName.endsWith(MODERN_TIER_SUFFIX)) { 1360 outf.println( 1361 "**Deprecated** This package contains only the set of locales listed as modern coverage. Use `" 1362 + CLDR_PKG_PREFIX 1363 + basePackageName 1364 + FULL_TIER_SUFFIX 1365 + "` and locale coverage data instead. The -modern packages are scheduled to be removed in v46, see [CLDR-16465](https://unicode-org.atlassian.net/browse/CLDR-16465)."); 1366 outf.println(); 1367 } 1368 outf.println(); 1369 outf.println(getNpmBadge(packageName)); 1370 outf.println(); 1371 writeReadmeSection(outf); 1372 } 1373 try (PrintWriter outf = 1374 FileUtilities.openUTF8Writer( 1375 outputDir + "/" + packageName, CldrUtility.LICENSE); ) { 1376 if (licenseFile.isPresent()) { 1377 try (BufferedReader br = FileUtilities.openUTF8Reader("", licenseFile.get()); ) { 1378 FileCopier.copy(br, outf); 1379 } 1380 } else { 1381 FileCopier.copy(CldrUtility.getUTF8Data(CldrUtility.LICENSE), outf); 1382 } 1383 } 1384 } 1385 getBasePackageName(final String packageName)1386 String getBasePackageName(final String packageName) { 1387 String basePackageName = packageName; 1388 if (basePackageName.startsWith(CLDR_PKG_PREFIX)) { 1389 basePackageName = basePackageName.substring(CLDR_PKG_PREFIX.length()); 1390 } 1391 if (basePackageName.endsWith(FULL_TIER_SUFFIX)) { 1392 basePackageName = 1393 basePackageName.substring( 1394 0, basePackageName.length() - FULL_TIER_SUFFIX.length()); 1395 } else if (basePackageName.endsWith(MODERN_TIER_SUFFIX)) { 1396 basePackageName = 1397 basePackageName.substring( 1398 0, basePackageName.length() - MODERN_TIER_SUFFIX.length()); 1399 } 1400 return basePackageName; 1401 } 1402 writeBasicInfo(JsonObject obj, String packageName, boolean isNPM)1403 public void writeBasicInfo(JsonObject obj, String packageName, boolean isNPM) { 1404 obj.addProperty("name", packageName); 1405 obj.addProperty("version", pkgVersion); 1406 1407 String[] packageNameParts = packageName.split("-"); 1408 String dependency = dependencies.get(packageNameParts[1]); 1409 if (dependency != null) { 1410 String[] dependentPackageNames = new String[1]; 1411 String tier = packageNameParts[packageNameParts.length - 1]; 1412 if (dependency.equals("core") || dependency.equals("bcp47")) { 1413 dependentPackageNames[0] = CLDR_PKG_PREFIX + dependency; 1414 } else { 1415 dependentPackageNames[0] = CLDR_PKG_PREFIX + dependency + "-" + tier; 1416 } 1417 1418 JsonObject dependencies = new JsonObject(); 1419 for (String dependentPackageName : dependentPackageNames) { 1420 if (dependentPackageName != null) { 1421 dependencies.addProperty(dependentPackageName, pkgVersion); 1422 } 1423 } 1424 obj.add(isNPM ? "peerDependencies" : "dependencies", dependencies); 1425 } 1426 } 1427 1428 /** 1429 * Default for version string 1430 * 1431 * @return 1432 */ getDefaultVersion()1433 private static String getDefaultVersion() { 1434 String versionString = CLDRFile.GEN_VERSION; 1435 while (versionString.split("\\.").length < 3) { 1436 versionString = versionString + ".0"; 1437 } 1438 return versionString; 1439 } 1440 writePackageJson(String outputDir, String packageName)1441 public void writePackageJson(String outputDir, String packageName) throws IOException { 1442 PrintWriter outf = 1443 FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "package.json"); 1444 logger.fine( 1445 PACKAGE_ICON 1446 + " Creating packaging file => " 1447 + outputDir 1448 + File.separator 1449 + packageName 1450 + File.separator 1451 + "package.json"); 1452 JsonObject obj = new JsonObject(); 1453 writeBasicInfo(obj, packageName, true); 1454 1455 JsonArray maintainers = new JsonArray(); 1456 JsonObject primaryMaintainer = new JsonObject(); 1457 JsonObject secondaryMaintainer = new JsonObject(); 1458 1459 final String basePackageName = getBasePackageName(packageName); 1460 String description = configFileReader.getPackageDescriptions().get(basePackageName); 1461 if (packageName.endsWith(MODERN_TIER_SUFFIX)) { 1462 description = description + " (modern only: deprecated)"; 1463 } 1464 obj.addProperty("description", description); 1465 1466 obj.addProperty("homepage", CLDRURLS.CLDR_HOMEPAGE); 1467 obj.addProperty("author", CLDRURLS.UNICODE_CONSORTIUM); 1468 1469 primaryMaintainer.addProperty("name", "Steven R. Loomis"); 1470 primaryMaintainer.addProperty("email", "srloomis@unicode.org"); 1471 1472 maintainers.add(primaryMaintainer); 1473 1474 secondaryMaintainer.addProperty("name", "John Emmons"); 1475 secondaryMaintainer.addProperty("email", "emmo@us.ibm.com"); 1476 secondaryMaintainer.addProperty("url", "https://github.com/JCEmmons"); 1477 1478 maintainers.add(secondaryMaintainer); 1479 obj.add("maintainers", maintainers); 1480 1481 JsonObject repository = new JsonObject(); 1482 repository.addProperty("type", "git"); 1483 repository.addProperty("url", "git://github.com/unicode-cldr/cldr-json.git"); 1484 obj.add("repository", repository); 1485 1486 obj.addProperty("license", CLDRURLS.UNICODE_SPDX); 1487 obj.addProperty("bugs", CLDRURLS.CLDR_NEWTICKET_URL); 1488 1489 final SupplementalDataInfo sdi = CLDRConfig.getInstance().getSupplementalDataInfo(); 1490 obj.addProperty("cldrVersion", sdi.getCldrVersionString()); 1491 obj.addProperty("unicodeVersion", sdi.getUnicodeVersionString()); 1492 1493 outf.println(gson.toJson(obj)); 1494 outf.close(); 1495 } 1496 writeBowerJson(String outputDir, String packageName)1497 public void writeBowerJson(String outputDir, String packageName) throws IOException { 1498 PrintWriter outf = 1499 FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "bower.json"); 1500 logger.fine( 1501 PACKAGE_ICON 1502 + " Creating packaging file => " 1503 + outputDir 1504 + File.separator 1505 + packageName 1506 + File.separator 1507 + "bower.json"); 1508 JsonObject obj = new JsonObject(); 1509 writeBasicInfo(obj, packageName, false); 1510 if (type == RunType.supplemental) { 1511 JsonArray mainPaths = new JsonArray(); 1512 mainPaths.add(new JsonPrimitive("availableLocales.json")); 1513 mainPaths.add(new JsonPrimitive("defaultContent.json")); // Handled specially 1514 mainPaths.add(new JsonPrimitive("scriptMetadata.json")); 1515 mainPaths.add(new JsonPrimitive(type.toString() + "/*.json")); 1516 obj.add("main", mainPaths); 1517 } else if (type == RunType.rbnf) { 1518 obj.addProperty("main", type.toString() + "/*.json"); 1519 } else { 1520 obj.addProperty("main", type.toString() + "/**/*.json"); 1521 } 1522 1523 JsonArray ignorePaths = new JsonArray(); 1524 ignorePaths.add(new JsonPrimitive(".gitattributes")); 1525 ignorePaths.add(new JsonPrimitive("README.md")); 1526 obj.add("ignore", ignorePaths); 1527 obj.addProperty("license", CLDRURLS.UNICODE_SPDX); 1528 1529 outf.println(gson.toJson(obj)); 1530 outf.close(); 1531 } 1532 writeDefaultContent(String outputDir)1533 public void writeDefaultContent(String outputDir) throws IOException { 1534 PrintWriter outf = 1535 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "defaultContent.json"); 1536 System.out.println( 1537 PACKAGE_ICON 1538 + " Creating packaging file => " 1539 + outputDir 1540 + "/cldr-core" 1541 + File.separator 1542 + "defaultContent.json"); 1543 JsonObject obj = new JsonObject(); 1544 obj.add("defaultContent", gson.toJsonTree(skippedDefaultContentLocales)); 1545 outf.println(gson.toJson(obj)); 1546 outf.close(); 1547 } 1548 writeTransformMetadata(String outputDir)1549 public void writeTransformMetadata(String outputDir) throws IOException { 1550 final String dirName = outputDir + "/cldr-" + RunType.transforms.name(); 1551 final String fileName = RunType.transforms.name() + ".json"; 1552 PrintWriter outf = FileUtilities.openUTF8Writer(dirName, fileName); 1553 System.out.println( 1554 PACKAGE_ICON 1555 + " Creating packaging file => " 1556 + dirName 1557 + File.separator 1558 + fileName); 1559 JsonObject obj = new JsonObject(); 1560 obj.add( 1561 RunType.transforms.name(), 1562 gson.toJsonTree(CLDRTransforms.getInstance().getJsonIndex())); 1563 outf.println(gson.toJson(obj)); 1564 outf.close(); 1565 } 1566 writeCoverageLevels(String outputDir)1567 public void writeCoverageLevels(String outputDir) throws IOException { 1568 try (PrintWriter outf = 1569 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "coverageLevels.json"); ) { 1570 final Map<String, String> covlocs = new TreeMap<>(); 1571 System.out.println( 1572 PACKAGE_ICON 1573 + " Creating packaging file => " 1574 + outputDir 1575 + "/cldr-core" 1576 + File.separator 1577 + "coverageLevels.json from coverageLevels.txt"); 1578 CalculatedCoverageLevels ccl = CalculatedCoverageLevels.getInstance(); 1579 for (final Map.Entry<String, org.unicode.cldr.util.Level> e : 1580 ccl.getLevels().entrySet()) { 1581 final String uloc = e.getKey(); 1582 final String level = e.getValue().name().toLowerCase(); 1583 final String bcp47loc = unicodeLocaleToString(uloc); 1584 if (covlocs.put(bcp47loc, level) != null) { 1585 throw new IllegalArgumentException( 1586 "coverageLevels.txt: duplicate locale " + bcp47loc); 1587 } 1588 } 1589 final Map<String, String> effectiveCovlocs = new TreeMap<>(); 1590 avl.full.forEach( 1591 loc -> { 1592 final String uloc = ULocale.forLanguageTag(loc).toString(); 1593 final Level lev = ccl.getEffectiveCoverageLevel(uloc); 1594 if (lev != null) { 1595 effectiveCovlocs.put(loc, lev.name().toLowerCase()); 1596 } 1597 }); 1598 JsonObject obj = new JsonObject(); 1599 // exactly what is in CLDR .txt file 1600 obj.add("coverageLevels", gson.toJsonTree(covlocs)); 1601 1602 // resolved, including all available locales 1603 obj.add("effectiveCoverageLevels", gson.toJsonTree(effectiveCovlocs)); 1604 outf.println(gson.toJson(obj)); 1605 } 1606 } 1607 writeAvailableLocales(String outputDir)1608 public void writeAvailableLocales(String outputDir) throws IOException { 1609 PrintWriter outf = 1610 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "availableLocales.json"); 1611 System.out.println( 1612 PACKAGE_ICON 1613 + " Creating packaging file => " 1614 + outputDir 1615 + "/cldr-core" 1616 + File.separator 1617 + "availableLocales.json"); 1618 JsonObject obj = new JsonObject(); 1619 obj.add("availableLocales", gson.toJsonTree(avl)); 1620 outf.println(gson.toJson(obj)); 1621 outf.close(); 1622 } 1623 writeScriptMetadata(String outputDir)1624 public void writeScriptMetadata(String outputDir) throws IOException { 1625 PrintWriter outf = 1626 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "scriptMetadata.json"); 1627 System.out.println( 1628 "Creating script metadata file => " 1629 + outputDir 1630 + File.separator 1631 + "cldr-core" 1632 + File.separator 1633 + "scriptMetadata.json"); 1634 Map<String, Info> scriptInfo = new TreeMap<>(); 1635 for (String script : ScriptMetadata.getScripts()) { 1636 Info i = ScriptMetadata.getInfo(script); 1637 scriptInfo.put(script, i); 1638 } 1639 if (ScriptMetadata.errors.size() > 0) { 1640 System.err.println(Joiner.on("\n\t").join(ScriptMetadata.errors)); 1641 // throw new IllegalArgumentException(); 1642 } 1643 1644 JsonObject obj = new JsonObject(); 1645 obj.add("scriptMetadata", gson.toJsonTree(scriptInfo)); 1646 outf.println(gson.toJson(obj)); 1647 outf.close(); 1648 } 1649 writePackageList(String outputDir)1650 public void writePackageList(String outputDir) throws IOException { 1651 final boolean includeModern = Boolean.parseBoolean(options.get("Modern").getValue()); 1652 PrintWriter outf = 1653 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "cldr-packages.json"); 1654 System.out.println( 1655 PACKAGE_ICON 1656 + " Creating packaging metadata file => " 1657 + outputDir 1658 + File.separator 1659 + "cldr-core" 1660 + File.separator 1661 + "cldr-packages.json and PACKAGES.md"); 1662 PrintWriter pkgs = FileUtilities.openUTF8Writer(outputDir + "/..", "PACKAGES.md"); 1663 1664 pkgs.println("# CLDR JSON Packages"); 1665 pkgs.println(); 1666 1667 LdmlConfigFileReader uberReader = new LdmlConfigFileReader(); 1668 1669 for (RunType r : RunType.values()) { 1670 if (r == RunType.all) continue; 1671 uberReader.read(null, r); 1672 } 1673 1674 TreeMap<String, String> pkgsToDesc = new TreeMap<>(); 1675 1676 JsonObject obj = new JsonObject(); 1677 obj.addProperty("license", CLDRURLS.UNICODE_SPDX); 1678 obj.addProperty("bugs", CLDRURLS.CLDR_NEWTICKET_URL); 1679 obj.addProperty("homepage", CLDRURLS.CLDR_HOMEPAGE); 1680 obj.addProperty("version", pkgVersion); 1681 1682 JsonArray packages = new JsonArray(); 1683 for (Map.Entry<String, String> e : uberReader.getPackageDescriptions().entrySet()) { 1684 final String baseName = e.getKey(); 1685 1686 if (baseName.equals("IGNORE") || baseName.equals("cal")) continue; 1687 if (baseName.equals("core") || baseName.equals("rbnf") || baseName.equals("bcp47")) { 1688 JsonObject packageEntry = new JsonObject(); 1689 packageEntry.addProperty("description", e.getValue()); 1690 packageEntry.addProperty("name", CLDR_PKG_PREFIX + baseName); 1691 packages.add(packageEntry); 1692 pkgsToDesc.put( 1693 packageEntry.get("name").getAsString(), 1694 packageEntry.get("description").getAsString()); 1695 } else { 1696 { 1697 JsonObject packageEntry = new JsonObject(); 1698 packageEntry.addProperty("description", e.getValue()); 1699 packageEntry.addProperty("tier", "full"); 1700 packageEntry.addProperty("name", CLDR_PKG_PREFIX + baseName + FULL_TIER_SUFFIX); 1701 packages.add(packageEntry); 1702 pkgsToDesc.put( 1703 packageEntry.get("name").getAsString(), 1704 packageEntry.get("description").getAsString()); 1705 } 1706 if (includeModern) { 1707 JsonObject packageEntry = new JsonObject(); 1708 packageEntry.addProperty("description", e.getValue() + " modern (deprecated)"); 1709 packageEntry.addProperty("tier", "modern"); 1710 packageEntry.addProperty( 1711 "name", CLDR_PKG_PREFIX + baseName + MODERN_TIER_SUFFIX); 1712 packages.add(packageEntry); 1713 pkgsToDesc.put( 1714 packageEntry.get("name").getAsString(), 1715 packageEntry.get("description").getAsString()); 1716 } 1717 } 1718 } 1719 pkgs.println(); 1720 for (Map.Entry<String, String> e : pkgsToDesc.entrySet()) { 1721 pkgs.println("### [" + e.getKey() + "](./cldr-json/" + e.getKey() + "/)"); 1722 pkgs.println(); 1723 if (e.getKey().contains("-modern")) { 1724 pkgs.println( 1725 " - **Note: Deprecated** see [CLDR-16465](https://unicode-org.atlassian.net/browse/CLDR-16465)."); 1726 } 1727 pkgs.println(" - " + e.getValue()); 1728 pkgs.println(" - " + getNpmBadge(e.getKey())); 1729 pkgs.println(); 1730 } 1731 obj.add("packages", packages); 1732 outf.println(gson.toJson(obj)); 1733 outf.close(); 1734 pkgs.println("## JSON Metadata"); 1735 pkgs.println(); 1736 pkgs.println( 1737 "Package metadata is available at [`cldr-core`/cldr-packages.json](./cldr-json/cldr-core/cldr-packages.json)"); 1738 pkgs.println(); 1739 1740 writeReadmeSection(pkgs); 1741 pkgs.close(); 1742 } 1743 getNpmBadge(final String packageName)1744 private String getNpmBadge(final String packageName) { 1745 return String.format( 1746 "[](https://www.npmjs.org/package/%s)", 1747 packageName, packageName); 1748 } 1749 1750 /** 1751 * Process the pending sorting items. 1752 * 1753 * @param out The ArrayList to hold all output lines. 1754 * @param nodesForLastItem All the nodes from last item. 1755 * @param sortingItems The item list that should be sorted before output. 1756 * @throws IOException 1757 * @throws ParseException 1758 */ resolveSortingItems( JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> sortingItems)1759 private void resolveSortingItems( 1760 JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> sortingItems) 1761 throws IOException, ParseException { 1762 ArrayList<CldrItem> arrayItems = new ArrayList<>(); 1763 String lastLeadingArrayItemPath = null; 1764 1765 if (!sortingItems.isEmpty()) { 1766 Collections.sort(sortingItems); 1767 for (CldrItem item : sortingItems) { 1768 Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(item.getPath()); 1769 if (matcher.matches()) { 1770 String leadingArrayItemPath = matcher.group(1); 1771 if (lastLeadingArrayItemPath != null 1772 && !lastLeadingArrayItemPath.equals(leadingArrayItemPath)) { 1773 resolveArrayItems(out, nodesForLastItem, arrayItems); 1774 } 1775 lastLeadingArrayItemPath = leadingArrayItemPath; 1776 arrayItems.add(item); 1777 } else { 1778 outputCldrItem(out, nodesForLastItem, item); 1779 } 1780 } 1781 sortingItems.clear(); 1782 resolveArrayItems(out, nodesForLastItem, arrayItems); 1783 } 1784 } 1785 1786 /** 1787 * Process the pending array items. 1788 * 1789 * @param out The ArrayList to hold all output lines. 1790 * @param nodesForLastItem All the nodes from last item. 1791 * @param arrayItems The item list that should be output as array. 1792 * @throws IOException 1793 * @throws ParseException 1794 */ resolveArrayItems( JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> arrayItems)1795 private void resolveArrayItems( 1796 JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> arrayItems) 1797 throws IOException, ParseException { 1798 if (!arrayItems.isEmpty()) { 1799 CldrItem firstItem = arrayItems.get(0); 1800 if (firstItem.needsSort()) { 1801 Collections.sort(arrayItems); 1802 firstItem = arrayItems.get(0); 1803 } 1804 1805 int arrayLevel = getArrayIndentLevel(firstItem); // only used for trim 1806 1807 JsonArray array = outputStartArray(out, nodesForLastItem, firstItem, arrayLevel); 1808 1809 // Previous statement closed for first element, trim nodesForLastItem 1810 // so that it will not happen again inside. 1811 int len = nodesForLastItem.size(); 1812 while (len > arrayLevel) { 1813 nodesForLastItem.remove(len - 1); 1814 len--; 1815 } 1816 for (CldrItem insideItem : arrayItems) { 1817 outputArrayItem(array, insideItem, nodesForLastItem, arrayLevel); 1818 } 1819 arrayItems.clear(); 1820 1821 int lastLevel = nodesForLastItem.size() - 1; 1822 // closeNodes(out, lastLevel, arrayLevel); 1823 // out.endArray(); 1824 for (int i = arrayLevel - 1; i < lastLevel; i++) { 1825 nodesForLastItem.remove(i); 1826 } 1827 } 1828 } 1829 1830 /** 1831 * Find the indent level on which array should be inserted. 1832 * 1833 * @param item The CldrItem being examined. 1834 * @return The array indent level. 1835 * @throws ParseException 1836 */ getArrayIndentLevel(CldrItem item)1837 private int getArrayIndentLevel(CldrItem item) throws ParseException { 1838 Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(item.getPath()); 1839 if (!matcher.matches()) { 1840 System.out.println("No match found for " + item.getPath() + ", this shouldn't happen."); 1841 return 0; 1842 } 1843 1844 String leadingPath = matcher.group(1); 1845 CldrItem fakeItem = new CldrItem(leadingPath, leadingPath, leadingPath, leadingPath, ""); 1846 return fakeItem.getNodesInPath().size() - 1; 1847 } 1848 1849 /** 1850 * Write the start of an array. 1851 * 1852 * @param out The root object 1853 * @param nodesForLastItem Nodes in path for last CldrItem. 1854 * @param item The CldrItem to be processed. 1855 * @param arrayLevel The level on which array is laid out. 1856 * @throws IOException 1857 * @throws ParseException 1858 */ outputStartArray( JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel)1859 private JsonArray outputStartArray( 1860 JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel) 1861 throws IOException, ParseException { 1862 1863 ArrayList<CldrNode> nodesInPath = item.getNodesInPath(); 1864 1865 JsonElement o = out; 1866 1867 // final CldrNode last = nodesInPath.get(nodesInPath.size()-1); 1868 1869 // Output nodes up to parent of 'arrayLevel' 1870 for (int i = 1; i < arrayLevel - 1; i++) { 1871 final CldrNode node = nodesInPath.get(i); 1872 o = startNonleafNode(o, node); 1873 } 1874 1875 // at arrayLevel, we have a named Array. 1876 // Get the name of the parent of the array 1877 String objName = nodesInPath.get(arrayLevel - 1).getNodeKeyName(); 1878 JsonArray array = new JsonArray(); 1879 o.getAsJsonObject().add(objName, array); 1880 1881 return array; 1882 } 1883 1884 /** 1885 * Write a CLDR item to file. 1886 * 1887 * <p>"usesMetazone" will be checked to see if it is current. Those non-current item will be 1888 * dropped. 1889 * 1890 * @param out The ArrayList to hold all output lines. 1891 * @param nodesForLastItem 1892 * @param item The CldrItem to be processed. 1893 * @throws IOException 1894 * @throws ParseException 1895 */ outputCldrItem(JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item)1896 private void outputCldrItem(JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item) 1897 throws IOException, ParseException { 1898 // alias has been resolved, no need to keep it. 1899 if (item.isAliasItem()) { 1900 return; 1901 } 1902 1903 ArrayList<CldrNode> nodesInPath = item.getNodesInPath(); 1904 int arraySize = nodesInPath.size(); 1905 1906 int i = 0; 1907 if (i == nodesInPath.size() && type != RunType.rbnf) { 1908 System.err.println( 1909 "This nodes and last nodes has identical path. (" 1910 + item.getPath() 1911 + ") Some distinguishing attributes wrongly removed?"); 1912 return; 1913 } 1914 1915 // close previous nodes 1916 // closeNodes(out, nodesForLastItem.size() - 2, i); 1917 JsonElement o = out; 1918 for (; i < nodesInPath.size() - 1; ++i) { 1919 o = startNonleafNode(o, nodesInPath.get(i)); 1920 } 1921 1922 writeLeafNode(o, nodesInPath.get(i), item.getValue()); 1923 nodesForLastItem.clear(); 1924 nodesForLastItem.addAll(nodesInPath); 1925 } 1926 1927 /** 1928 * Start a non-leaf node, adding it if not there. 1929 * 1930 * @param out The input JsonObject 1931 * @param node The node being written. 1932 * @throws IOException 1933 */ startNonleafNode(JsonElement out, final CldrNode node)1934 private JsonElement startNonleafNode(JsonElement out, final CldrNode node) throws IOException { 1935 String objName = node.getNodeKeyName(); 1936 // Some node should be skipped as indicated by objName being null. 1937 logger.finest(() -> "objName= " + objName + " for path " + node.getUntransformedPath()); 1938 if (objName == null 1939 || objName.equals("cldr") 1940 || objName.equals("ldmlBCP47")) { // Skip root 'cldr' node 1941 return out; 1942 } 1943 1944 Map<String, String> attrAsValueMap = node.getAttrAsValueMap(); 1945 1946 String name; 1947 1948 if (type == RunType.annotations || type == RunType.annotationsDerived) { 1949 if (objName.startsWith("U+")) { 1950 // parse U+22 -> " etc 1951 name = (com.ibm.icu.text.UTF16.valueOf(Integer.parseInt(objName.substring(2), 16))); 1952 } else { 1953 name = (objName); 1954 } 1955 } else { 1956 name = (objName); 1957 } 1958 1959 JsonElement o = out.getAsJsonObject().get(name); 1960 1961 if (o == null) { 1962 o = new JsonObject(); 1963 out.getAsJsonObject().add(name, o); 1964 } 1965 1966 for (final String key : attrAsValueMap.keySet()) { 1967 logger.finest(() -> "Non-Leaf Node: " + node.getUntransformedPath() + " ." + key); 1968 String rawAttrValue = attrAsValueMap.get(key); 1969 String value = escapeValue(rawAttrValue); 1970 // attribute is prefixed with "_" when being used as key. 1971 String attrAsKey = "_" + key; 1972 if (LdmlConvertRules.attrIsBooleanOmitFalse( 1973 node.getUntransformedPath(), node.getName(), node.getParent(), key)) { 1974 final Boolean v = Boolean.parseBoolean(rawAttrValue); 1975 if (v) { 1976 o.getAsJsonObject().addProperty(attrAsKey, v); 1977 } // else, omit 1978 } else { 1979 // hack for localeRules 1980 if (attrAsKey.equals("_localeRules")) { 1981 // find the _localeRules object, add if it didn't exist 1982 JsonElement localeRules = out.getAsJsonObject().get(attrAsKey); 1983 if (localeRules == null) { 1984 localeRules = new JsonObject(); 1985 out.getAsJsonObject().add(attrAsKey, localeRules); 1986 } 1987 // find the sibling object, add if it did't exist ( this will be parentLocale or 1988 // collations etc.) 1989 JsonElement sibling = localeRules.getAsJsonObject().get(name); 1990 if (sibling == null) { 1991 sibling = new JsonObject(); 1992 localeRules.getAsJsonObject().add(name, sibling); 1993 } 1994 // get the 'parent' attribute, which wil be the value 1995 final String parent = 1996 XPathParts.getFrozenInstance(node.getUntransformedPath()) 1997 .getAttributeValue(-1, "parent"); 1998 // finally, we add something like "nonLikelyScript: und" 1999 sibling.getAsJsonObject().addProperty(value, parent); 2000 } else { 2001 o.getAsJsonObject().addProperty(attrAsKey, value); 2002 } 2003 } 2004 } 2005 return o; 2006 } 2007 2008 /** 2009 * Write a CLDR item to file. 2010 * 2011 * <p>"usesMetazone" will be checked to see if it is current. Those non-current item will be 2012 * dropped. 2013 * 2014 * @param out The ArrayList to hold all output lines. 2015 * @param item The CldrItem to be processed. 2016 * @param nodesForLastItem Nodes in path for last item. 2017 * @param arrayLevel The indentation level in which array exists. 2018 * @throws IOException 2019 * @throws ParseException 2020 */ outputArrayItem( JsonArray out, CldrItem item, ArrayList<CldrNode> nodesForLastItem, int arrayLevel)2021 private void outputArrayItem( 2022 JsonArray out, CldrItem item, ArrayList<CldrNode> nodesForLastItem, int arrayLevel) 2023 throws IOException, ParseException { 2024 2025 // This method is more complicated that outputCldrItem because it needs to 2026 // handle 3 different cases. 2027 // 1. When difference is found below array item, this item will be of the 2028 // same array item. Inside the array item, it is about the same as 2029 // outputCldrItem, just with one more level of indentation because of 2030 // the array. 2031 // 2. The array item is the leaf item with no attribute, simplify it as 2032 // an object with one name/value pair. 2033 // 3. The array item is the leaf item with attribute, an embedded object 2034 // will be created inside the array item object. 2035 2036 ArrayList<CldrNode> nodesInPath = item.getNodesInPath(); 2037 String value = escapeValue(item.getValue()); 2038 int nodesNum = nodesInPath.size(); 2039 2040 // case 1 2041 // int diff = findFirstDiffNodeIndex(nodesForLastItem, nodesInPath); 2042 CldrNode cldrNode = nodesInPath.get(nodesNum - 1); 2043 2044 // if (diff > arrayLevel) { 2045 // // close previous nodes 2046 // closeNodes(out, nodesForLastItem.size() - 1, diff + 1); 2047 2048 // for (int i = diff; i < nodesNum - 1; i++) { 2049 // startNonleafNode(out, nodesInPath.get(i), i + 1); 2050 // } 2051 // writeLeafNode(out, cldrNode, value, nodesNum); 2052 // return; 2053 // } 2054 2055 if (arrayLevel == nodesNum - 1) { 2056 // case 2 2057 // close previous nodes 2058 // if (nodesForLastItem.size() - 1 - arrayLevel > 0) { 2059 // closeNodes(out, nodesForLastItem.size() - 1, arrayLevel); 2060 // } 2061 2062 String objName = cldrNode.getNodeKeyName(); 2063 int pos = objName.indexOf('-'); 2064 if (pos > 0) { 2065 objName = objName.substring(0, pos); 2066 } 2067 2068 Map<String, String> attrAsValueMap = cldrNode.getAttrAsValueMap(); 2069 2070 if (attrAsValueMap.isEmpty()) { 2071 JsonObject o = new JsonObject(); 2072 out.add(o); 2073 o.addProperty(objName, value); 2074 } else if (objName.equals("rbnfrule")) { 2075 writeRbnfLeafNode(out, item, attrAsValueMap); 2076 } else { 2077 JsonObject o = new JsonObject(); 2078 writeLeafNode( 2079 o, 2080 objName, 2081 attrAsValueMap, 2082 value, 2083 cldrNode.getName(), 2084 cldrNode.getParent(), 2085 cldrNode); 2086 out.add(o); 2087 } 2088 // the last node is closed, remove it. 2089 nodesInPath.remove(nodesNum - 1); 2090 } else { 2091 // case 3 2092 // close previous nodes 2093 // if (nodesForLastItem.size() - 1 - (arrayLevel) > 0) { 2094 // closeNodes(out, nodesForLastItem.size() - 1, arrayLevel); 2095 // } 2096 2097 JsonObject o = new JsonObject(); 2098 out.add(o); 2099 2100 CldrNode node = nodesInPath.get(arrayLevel); 2101 String objName = node.getNodeKeyName(); 2102 int pos = objName.indexOf('-'); 2103 if (pos > 0) { 2104 objName = objName.substring(0, pos); 2105 } 2106 Map<String, String> attrAsValueMap = node.getAttrAsValueMap(); 2107 JsonObject oo = new JsonObject(); 2108 o.add(objName, oo); 2109 for (String key : attrAsValueMap.keySet()) { 2110 // attribute is prefixed with "_" when being used as key. 2111 oo.addProperty("_" + key, escapeValue(attrAsValueMap.get(key))); 2112 } 2113 2114 JsonElement o2 = out; 2115 System.err.println("PROBLEM at " + cldrNode.getUntransformedPath()); 2116 // TODO ?!! 2117 for (int i = arrayLevel + 1; i < nodesInPath.size() - 1; i++) { 2118 o2 = startNonleafNode(o2, nodesInPath.get(i)); 2119 } 2120 writeLeafNode(o2, cldrNode, value); 2121 } 2122 2123 nodesForLastItem.clear(); 2124 nodesForLastItem.addAll(nodesInPath); 2125 } 2126 writeRbnfLeafNode( JsonElement out, CldrItem item, Map<String, String> attrAsValueMap)2127 private void writeRbnfLeafNode( 2128 JsonElement out, CldrItem item, Map<String, String> attrAsValueMap) throws IOException { 2129 if (attrAsValueMap.size() != 1) { 2130 throw new IllegalArgumentException( 2131 "Error, attributes seem wrong for RBNF " + item.getUntransformedPath()); 2132 } 2133 Entry<String, String> entry = attrAsValueMap.entrySet().iterator().next(); 2134 JsonArray arr = new JsonArray(); 2135 arr.add(entry.getKey()); 2136 arr.add(entry.getValue()); 2137 out.getAsJsonArray().add(arr); 2138 } 2139 progressPrefix( AtomicInteger readCount, int totalCount, String filename, String section)2140 private String progressPrefix( 2141 AtomicInteger readCount, int totalCount, String filename, String section) { 2142 return progressPrefix(readCount.get(), totalCount, filename, section); 2143 } 2144 progressPrefix(int readCount, int totalCount, String filename, String section)2145 private String progressPrefix(int readCount, int totalCount, String filename, String section) { 2146 return progressPrefix(readCount, totalCount) + filename + "\t" + section + "\t"; 2147 } 2148 progressPrefix(AtomicInteger readCount, int totalCount)2149 private final String progressPrefix(AtomicInteger readCount, int totalCount) { 2150 return progressPrefix(readCount.get(), totalCount); 2151 } 2152 2153 final LocalizedNumberFormatter percentFormatter = 2154 NumberFormatter.withLocale(Locale.ENGLISH) 2155 .unit(NoUnit.PERCENT) 2156 .integerWidth(IntegerWidth.zeroFillTo(3)) 2157 .precision(Precision.integer()); 2158 progressPrefix(int readCount, int totalCount)2159 private final String progressPrefix(int readCount, int totalCount) { 2160 double asPercent = ((double) readCount / (double) totalCount) * 100.0; 2161 return String.format( 2162 SECTION_ICON + " %s (step %d/%d)\t[%s]:\t", 2163 type, 2164 type.ordinal(), 2165 RunType.values().length 2166 - 1, // which 'type' are we on? (all=0, minus one to get the count right) 2167 percentFormatter.format(asPercent)); 2168 } 2169 2170 /** 2171 * Process files in a directory of CLDR file tree. 2172 * 2173 * @param dirName The directory in which xml file will be transformed. 2174 * @param minimalDraftStatus The minimumDraftStatus that will be accepted. 2175 * @throws IOException 2176 * @throws ParseException 2177 */ processDirectory(String dirName, DraftStatus minimalDraftStatus)2178 public void processDirectory(String dirName, DraftStatus minimalDraftStatus) 2179 throws IOException, ParseException { 2180 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(cldrCommonDir + "supplemental"); 2181 Factory cldrFactory = Factory.make(cldrCommonDir + dirName + "/", ".*"); 2182 Set<String> files = 2183 cldrFactory 2184 .getAvailable() 2185 // filter these out early so our work count is correct 2186 .stream() 2187 .filter( 2188 filename -> 2189 filename.matches(match) 2190 && !LdmlConvertRules.IGNORE_FILE_SET.contains( 2191 filename)) 2192 .collect(Collectors.toSet()); 2193 final int total = files.size(); 2194 AtomicInteger readCount = new AtomicInteger(0); 2195 Map<String, Throwable> errs = new TreeMap<>(); 2196 2197 // This takes a long time (minutes, in 2020), so run it in parallel forkJoinPool threads. 2198 // The result of this pipeline is an array of toString()-able filenames of XML files which 2199 // produced no JSON output, just as a warning. 2200 System.out.println( 2201 progressPrefix(0, total) 2202 + " " 2203 + MessageFormat.format( 2204 GEAR_ICON 2205 + " Beginning parallel process of {0, plural, one {# file} other {# files}}", 2206 total)); 2207 Object noOutputFiles[] = 2208 files.parallelStream() 2209 .unordered() 2210 .map( 2211 filename -> { 2212 String pathPrefix; 2213 CLDRFile file = 2214 cldrFactory.make( 2215 filename, 2216 resolve && type == RunType.main, 2217 minimalDraftStatus); 2218 // Print 'reading' after the make, to stagger the output a 2219 // little bit. 2220 // Otherwise, the printout happens before any work happens, and 2221 // is easily out of order. 2222 readCount.incrementAndGet(); 2223 logger.fine( 2224 () -> 2225 "<" 2226 + progressPrefix( 2227 readCount, total, dirName, 2228 filename) 2229 + "\r"); 2230 2231 if (type == RunType.main) { 2232 pathPrefix = 2233 "/cldr/" 2234 + dirName 2235 + "/" 2236 + unicodeLocaleToString(filename) 2237 + "/"; 2238 } else { 2239 pathPrefix = "/cldr/" + dirName + "/"; 2240 } 2241 int totalForThisFile = 0; 2242 try { 2243 totalForThisFile = 2244 convertCldrItems( 2245 readCount, 2246 total, 2247 dirName, 2248 filename, 2249 pathPrefix, 2250 mapPathsToSections( 2251 readCount, 2252 total, 2253 file, 2254 pathPrefix, 2255 sdi)); 2256 } catch (IOException | ParseException t) { 2257 t.printStackTrace(); 2258 System.err.println( 2259 "!" 2260 + progressPrefix(readCount, total) 2261 + filename 2262 + " - err - " 2263 + t); 2264 errs.put(filename, t); 2265 } finally { 2266 logger.fine( 2267 () -> 2268 "." 2269 + progressPrefix(readCount, total) 2270 + "Completing " 2271 + dirName 2272 + "/" 2273 + filename); 2274 } 2275 return new Pair<>(dirName + "/" + filename, totalForThisFile); 2276 }) 2277 .filter(p -> p.getSecond() == 0) // filter out only files which produced no 2278 // output 2279 .map(p -> p.getFirst()) 2280 .toArray(); 2281 System.out.println( 2282 progressPrefix(total, total) 2283 + " " 2284 + DONE_ICON 2285 + MessageFormat.format( 2286 "Completed parallel process of {0, plural, one {# file} other {# files}}", 2287 total)); 2288 if (noOutputFiles.length > 0) { 2289 System.err.println( 2290 WARN_ICON 2291 + MessageFormat.format( 2292 " Warning: {0, plural, one {# file} other {# files}} did not produce any output (check JSON config):", 2293 noOutputFiles.length)); 2294 for (final Object f : noOutputFiles) { 2295 final String loc = f.toString(); 2296 final String uloc = unicodeLocaleToString(f.toString()); 2297 if (skipBcp47LocalesWithSubtags 2298 && type.locales() 2299 && HAS_SUBTAG.matcher(uloc).matches()) { 2300 System.err.println( 2301 "\t- " + loc + " ❎ (Skipped due to '-T true': " + uloc + ")"); 2302 } else { 2303 System.err.println("\t- " + loc); 2304 } 2305 } 2306 } 2307 2308 if (!errs.isEmpty()) { 2309 System.err.println("Errors in these files:"); 2310 for (Map.Entry<String, Throwable> e : errs.entrySet()) { 2311 System.err.println(e.getKey() + " - " + e.getValue()); 2312 } 2313 // rethrow 2314 for (Map.Entry<String, Throwable> e : errs.entrySet()) { 2315 if (e.getValue() instanceof IOException) { 2316 throw (IOException) e.getValue(); // throw the first one 2317 } else if (e.getValue() instanceof ParseException) { 2318 throw (ParseException) e.getValue(); // throw the first one 2319 } else { 2320 throw new RuntimeException("Other exception thrown: " + e.getValue()); 2321 } 2322 /* NOTREACHED */ 2323 } 2324 } 2325 2326 if (writePackages) { 2327 for (String currentPackage : packages) { 2328 writePackagingFiles(outputDir, currentPackage); 2329 } 2330 if (type == RunType.main) { 2331 writeDefaultContent(outputDir); 2332 writeAvailableLocales(outputDir); 2333 writeCoverageLevels(outputDir); 2334 } else if (type == RunType.supplemental) { 2335 writeScriptMetadata(outputDir); 2336 if (Boolean.parseBoolean(options.get("packagelist").getValue())) { 2337 writePackageList(outputDir); 2338 } 2339 } else if (type == RunType.transforms) { 2340 writeTransformMetadata(outputDir); 2341 } 2342 } 2343 } 2344 2345 /** Replacement pattern for escaping. */ 2346 private static final Pattern escapePattern = PatternCache.get("\\\\(?!u)"); 2347 2348 /** 2349 * Escape \ in value string. \ should be replaced by \\, except in case of \u1234 In following 2350 * code, \\\\ represent one \, because java compiler and regular expression compiler each do one 2351 * round of escape. 2352 * 2353 * @param value Input string. 2354 * @return escaped string. 2355 */ escapeValue(String value)2356 private String escapeValue(String value) { 2357 Matcher match = escapePattern.matcher(value); 2358 String ret = match.replaceAll("\\\\"); 2359 return ret.replace("\n", " ").replace("\t", " "); 2360 } 2361 2362 /** 2363 * Write the value to output. 2364 * 2365 * @param out The ArrayList to hold all output lines. 2366 * @param node The CldrNode being written. 2367 * @param value The value part for this element. 2368 * @param level Indent level. 2369 * @throws IOException 2370 */ writeLeafNode(JsonElement out, CldrNode node, String value)2371 private void writeLeafNode(JsonElement out, CldrNode node, String value) throws IOException { 2372 2373 String objName = node.getNodeKeyName(); 2374 Map<String, String> attrAsValueMaps = node.getAttrAsValueMap(); 2375 writeLeafNode(out, objName, attrAsValueMaps, value, node.getName(), node.getParent(), node); 2376 } 2377 2378 /** 2379 * Write the value to output. 2380 * 2381 * @param out The ArrayList to hold all output lines. 2382 * @param objName The node's node. 2383 * @param attrAsValueMap Those attributes that will be treated as values. 2384 * @param value The value part for this element. 2385 * @param level Indent level. 2386 * @param nodeName the original nodeName (not distinguished) 2387 * @throws IOException 2388 */ writeLeafNode( JsonElement out, String objName, Map<String, String> attrAsValueMap, String value, final String nodeName, String parent, CldrNode node)2389 private void writeLeafNode( 2390 JsonElement out, 2391 String objName, 2392 Map<String, String> attrAsValueMap, 2393 String value, 2394 final String nodeName, 2395 String parent, 2396 CldrNode node) 2397 throws IOException { 2398 if (objName == null) { 2399 return; 2400 } 2401 value = escapeValue(value); 2402 2403 final boolean valueIsSpacesepArray = 2404 LdmlConvertRules.valueIsSpacesepArray(nodeName, parent); 2405 if (attrAsValueMap.isEmpty()) { 2406 // out.name(objName); 2407 if (value.isEmpty()) { 2408 if (valueIsSpacesepArray) { 2409 // empty value, output as empty space-sep array: [] 2410 out.getAsJsonObject().add(objName, new JsonArray()); 2411 } else { 2412 // empty value. 2413 if (objName.endsWith("SpaceReplacement")) { // foreignSpaceReplacement or 2414 // nativeSpaceReplacement 2415 out.getAsJsonObject().addProperty(objName, ""); 2416 } else { 2417 out.getAsJsonObject().add(objName, new JsonObject()); 2418 } 2419 } 2420 } else if (type == RunType.annotations || type == RunType.annotationsDerived) { 2421 JsonArray a = new JsonArray(); 2422 // split this, so "a | b | c" becomes ["a","b","c"] 2423 for (final String s : Annotations.splitter.split(value.trim())) { 2424 a.add(s); 2425 } 2426 out.getAsJsonObject().add(objName, a); 2427 } else if (valueIsSpacesepArray) { 2428 outputSpaceSepArray(out, objName, value); 2429 } else { 2430 // normal value 2431 out.getAsJsonObject().addProperty(objName, value); 2432 } 2433 return; 2434 } 2435 2436 // If there is no value, but a attribute being treated as value, 2437 // simplify the output. 2438 if (value.isEmpty() && attrAsValueMap.containsKey(LdmlConvertRules.ANONYMOUS_KEY)) { 2439 String v = attrAsValueMap.get(LdmlConvertRules.ANONYMOUS_KEY); 2440 // out.name(objName); 2441 if (valueIsSpacesepArray) { 2442 outputSpaceSepArray(out, objName, v); 2443 } else { 2444 out.getAsJsonObject().addProperty(objName, v); 2445 } 2446 return; 2447 } 2448 2449 JsonObject o = new JsonObject(); 2450 out.getAsJsonObject().add(objName, o); 2451 2452 if (!value.isEmpty()) { 2453 o.addProperty("_value", value); 2454 } 2455 2456 for (final String key : attrAsValueMap.keySet()) { 2457 String rawAttrValue = attrAsValueMap.get(key); 2458 String attrValue = escapeValue(rawAttrValue); 2459 // attribute is prefixed with "_" when being used as key. 2460 String attrAsKey = "_" + key; 2461 if (node != null) { 2462 logger.finest(() -> "Leaf Node: " + node.getUntransformedPath() + " ." + key); 2463 } 2464 if (LdmlConvertRules.ATTRVALUE_AS_ARRAY_SET.contains(key)) { 2465 String[] strings = attrValue.trim().split("\\s+"); 2466 JsonArray a = new JsonArray(); 2467 o.add(attrAsKey, a); 2468 for (String s : strings) { 2469 a.add(s); 2470 } 2471 } else if (node != null 2472 && LdmlConvertRules.attrIsBooleanOmitFalse( 2473 node.getUntransformedPath(), nodeName, parent, key)) { 2474 final Boolean v = Boolean.parseBoolean(rawAttrValue); 2475 if (v) { 2476 o.addProperty(attrAsKey, v); 2477 } // else: omit falsy value 2478 } else { 2479 o.addProperty(attrAsKey, attrValue); 2480 } 2481 } 2482 } 2483 outputSpaceSepArray(JsonElement out, String objName, String v)2484 private void outputSpaceSepArray(JsonElement out, String objName, String v) throws IOException { 2485 JsonArray a = new JsonArray(); 2486 out.getAsJsonObject().add(objName, a); 2487 // split this, so "a b c" becomes ["a","b","c"] 2488 for (final String s : v.trim().split(" ")) { 2489 if (!s.isEmpty()) { 2490 a.add(s); 2491 } 2492 } 2493 } 2494 } 2495