• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.json;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.Lists;
5 import com.google.gson.Gson;
6 import com.google.gson.GsonBuilder;
7 import com.google.gson.JsonArray;
8 import com.google.gson.JsonElement;
9 import com.google.gson.JsonObject;
10 import com.google.gson.JsonPrimitive;
11 import com.ibm.icu.number.IntegerWidth;
12 import com.ibm.icu.number.LocalizedNumberFormatter;
13 import com.ibm.icu.number.NumberFormatter;
14 import com.ibm.icu.number.Precision;
15 import com.ibm.icu.text.MessageFormat;
16 import com.ibm.icu.util.NoUnit;
17 import com.ibm.icu.util.ULocale;
18 import java.io.BufferedReader;
19 import java.io.File;
20 import java.io.IOException;
21 import java.io.PrintWriter;
22 import java.text.ParseException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.Collections;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.LinkedList;
29 import java.util.List;
30 import java.util.Locale;
31 import java.util.Map;
32 import java.util.Map.Entry;
33 import java.util.Optional;
34 import java.util.Set;
35 import java.util.TreeMap;
36 import java.util.TreeSet;
37 import java.util.concurrent.atomic.AtomicInteger;
38 import java.util.logging.Logger;
39 import java.util.regex.Matcher;
40 import java.util.regex.Pattern;
41 import java.util.stream.Collectors;
42 import org.unicode.cldr.draft.FileUtilities;
43 import org.unicode.cldr.draft.ScriptMetadata;
44 import org.unicode.cldr.draft.ScriptMetadata.Info;
45 import org.unicode.cldr.tool.Option.Options;
46 import org.unicode.cldr.util.Annotations;
47 import org.unicode.cldr.util.CLDRConfig;
48 import org.unicode.cldr.util.CLDRFile;
49 import org.unicode.cldr.util.CLDRFile.DraftStatus;
50 import org.unicode.cldr.util.CLDRLocale;
51 import org.unicode.cldr.util.CLDRPaths;
52 import org.unicode.cldr.util.CLDRTool;
53 import org.unicode.cldr.util.CLDRTransforms;
54 import org.unicode.cldr.util.CLDRURLS;
55 import org.unicode.cldr.util.CalculatedCoverageLevels;
56 import org.unicode.cldr.util.CldrUtility;
57 import org.unicode.cldr.util.CoverageInfo;
58 import org.unicode.cldr.util.DtdData;
59 import org.unicode.cldr.util.DtdType;
60 import org.unicode.cldr.util.Factory;
61 import org.unicode.cldr.util.FileCopier;
62 import org.unicode.cldr.util.GlossonymConstructor;
63 import org.unicode.cldr.util.Level;
64 import org.unicode.cldr.util.LocaleIDParser;
65 import org.unicode.cldr.util.Pair;
66 import org.unicode.cldr.util.PatternCache;
67 import org.unicode.cldr.util.StandardCodes;
68 import org.unicode.cldr.util.SupplementalDataInfo;
69 import org.unicode.cldr.util.Timer;
70 import org.unicode.cldr.util.XMLSource;
71 import org.unicode.cldr.util.XPathParts;
72 
73 /**
74  * Utility methods to extract data from CLDR repository and export it in JSON format.
75  *
76  * @author shanjian / emmons
77  */
78 @CLDRTool(alias = "ldml2json", description = "Convert CLDR data to JSON")
79 public class Ldml2JsonConverter {
80     // Icons
81     private static final String DONE_ICON = "✅";
82     private static final String GEAR_ICON = "⚙️";
83     private static final String NONE_ICON = "∅";
84     private static final String PACKAGE_ICON = "��";
85     private static final String SECTION_ICON = "��";
86     private static final String TYPE_ICON = "��";
87     private static final String WARN_ICON = "⚠️";
88 
89     // File prefix
90     private static final String CLDR_PKG_PREFIX = "cldr-";
91     private static final String FULL_TIER_SUFFIX = "-full";
92     private static final String MODERN_TIER_SUFFIX = "-modern";
93     private static final String TRANSFORM_RAW_SUFFIX = ".txt";
94     private static Logger logger = Logger.getLogger(Ldml2JsonConverter.class.getName());
95 
96     enum RunType {
97         all, // number zero
98         main,
99         supplemental(false, false), // aka 'cldr-core'
100         segments,
101         rbnf(false, true),
102         annotations,
103         annotationsDerived,
104         bcp47(false, false),
105         transforms(false, false);
106 
107         private final boolean isTiered;
108         private final boolean hasLocales;
109 
RunType()110         RunType() {
111             this.isTiered = true;
112             this.hasLocales = true;
113         }
114 
RunType(boolean isTiered, boolean hasLocales)115         RunType(boolean isTiered, boolean hasLocales) {
116             this.isTiered = isTiered;
117             this.hasLocales = hasLocales;
118         }
119         /**
120          * Is it split into modern/full?
121          *
122          * @return
123          */
tiered()124         public boolean tiered() {
125             return isTiered;
126         }
127         /**
128          * Does it have locale IDs?
129          *
130          * @return
131          */
locales()132         public boolean locales() {
133             return hasLocales;
134         }
135         /**
136          * return the options as a pipe-delimited list
137          *
138          * @return
139          */
valueList()140         public static String valueList() {
141             return String.join(
142                     "|",
143                     Lists.newArrayList(RunType.values()).stream()
144                             .map(t -> t.name())
145                             .toArray(String[]::new));
146         }
147     }
148 
149     private static final StandardCodes sc = StandardCodes.make();
150     private Set<String> defaultContentLocales =
151             SupplementalDataInfo.getInstance().getDefaultContentLocales();
152     private Set<String> skippedDefaultContentLocales = new TreeSet<>();
153 
154     private class AvailableLocales {
155         Set<String> modern = new TreeSet<>();
156         Set<String> full = new TreeSet<>();
157     }
158 
159     private AvailableLocales avl = new AvailableLocales();
160     private Gson gson = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
161     private static final Options options =
162             new Options(
163                             "Usage: LDML2JsonConverter [OPTIONS] [FILES]\n"
164                                     + "This program converts CLDR data to the JSON format.\n"
165                                     + "Please refer to the following options. \n"
166                                     + "\texample: org.unicode.cldr.json.Ldml2JsonConverter -c xxx -d yyy")
167                     .add(
168                             "bcp47",
169                             'B',
170                             "(true|false)",
171                             "true",
172                             "Whether to strictly use BCP47 tags in filenames and data. Defaults to true.")
173                     .add(
174                             "bcp47-no-subtags",
175                             'T',
176                             "(true|false)",
177                             "true",
178                             "In BCP47 mode, ignore locales with subtags such as en-US-u-va-posix. Defaults to true.")
179                     .add(
180                             "commondir",
181                             'c',
182                             ".*",
183                             CLDRPaths.COMMON_DIRECTORY,
184                             "Common directory for CLDR files, defaults to CldrUtility.COMMON_DIRECTORY")
185                     .add(
186                             "destdir",
187                             'd',
188                             ".*",
189                             CLDRPaths.GEN_DIRECTORY,
190                             "Destination directory for output files, defaults to CldrUtility.GEN_DIRECTORY")
191                     .add(
192                             "match",
193                             'm',
194                             ".*",
195                             ".*",
196                             "Regular expression to define only specific locales or files to be generated")
197                     .add(
198                             "type",
199                             't',
200                             "(" + RunType.valueList() + ")",
201                             "all",
202                             "Type of CLDR data being generated, such as main, supplemental, or segments. All gets all.")
203                     .add(
204                             "resolved",
205                             'r',
206                             "(true|false)",
207                             "false",
208                             "Whether the output JSON for the main directory should be based on resolved or unresolved data")
209                     .add(
210                             "Redundant",
211                             'R',
212                             "(true|false)",
213                             "false",
214                             "Include redundant data from code-fallback and constructed")
215                     .add(
216                             "draftstatus",
217                             's',
218                             "(approved|contributed|provisional|unconfirmed)",
219                             "unconfirmed",
220                             "The minimum draft status of the output data")
221                     .add(
222                             "coverage",
223                             'l',
224                             "(minimal|basic|moderate|modern|comprehensive|optional)",
225                             "optional",
226                             "The maximum coverage level of the output data")
227                     .add(
228                             "packagelist",
229                             'P',
230                             "(true|false)",
231                             "true",
232                             "Whether to output PACKAGES.md and cldr-core/cldr-packages.json (during supplemental/cldr-core)")
233                     .add(
234                             "fullnumbers",
235                             'n',
236                             "(true|false)",
237                             "false",
238                             "Whether the output JSON should output data for all numbering systems, even those not used in the locale")
239                     .add(
240                             "other",
241                             'o',
242                             "(true|false)",
243                             "false",
244                             "Whether to write out the 'other' section, which contains any unmatched paths")
245                     .add(
246                             "packages",
247                             'p',
248                             "(true|false)",
249                             "false",
250                             "Whether to group data files into installable packages")
251                     .add(
252                             "identity",
253                             'i',
254                             "(true|false)",
255                             "true",
256                             "Whether to copy the identity info into all sections containing data")
257                     .add("konfig", 'k', ".*", null, "LDML to JSON configuration file")
258                     .add(
259                             "pkgversion",
260                             'V',
261                             ".*",
262                             getDefaultVersion(),
263                             "Version to be used in writing package files")
264                     .add(
265                             "Modern",
266                             'M',
267                             "(true|false)",
268                             "false",
269                             "Whether to include the -modern tier")
270                     // Primarily useful for non-Maven build systems where CldrUtility.LICENSE may
271                     // not be available as it is put in place by pom.xml
272                     .add(
273                             "license-file",
274                             'L',
275                             ".*",
276                             "",
277                             "Override the license file included in the bundle");
278 
main(String[] args)279     public static void main(String[] args) throws Exception {
280         System.out.println(GEAR_ICON + " " + Ldml2JsonConverter.class.getName() + " options:");
281         options.parse(args, true);
282 
283         Timer overallTimer = new Timer();
284         overallTimer.start();
285         final String rawType = options.get("type").getValue();
286 
287         if (RunType.all.name().equals(rawType)) {
288             // Running all types
289             for (final RunType t : RunType.values()) {
290                 if (t == RunType.all) continue;
291                 System.out.println();
292                 System.out.println(
293                         TYPE_ICON + "#######################  " + t + " #######################");
294                 Timer subTimer = new Timer();
295                 subTimer.start();
296                 processType(t.name());
297                 System.out.println(
298                         TYPE_ICON + " " + t + "\tFinished in " + subTimer.toMeasureString());
299                 System.out.println();
300             }
301         } else {
302             processType(rawType);
303         }
304 
305         System.out.println(
306                 "\n\n###\n\n"
307                         + DONE_ICON
308                         + " Finished everything in "
309                         + overallTimer.toMeasureString());
310     }
311 
processType(final String runType)312     static void processType(final String runType) throws Exception {
313         Ldml2JsonConverter l2jc =
314                 new Ldml2JsonConverter(
315                         options.get("commondir").getValue(),
316                         options.get("destdir").getValue(),
317                         runType,
318                         Boolean.parseBoolean(options.get("fullnumbers").getValue()),
319                         Boolean.parseBoolean(options.get("resolved").getValue()),
320                         options.get("coverage").getValue(),
321                         options.get("match").getValue(),
322                         Boolean.parseBoolean(options.get("packages").getValue()),
323                         options.get("konfig").getValue(),
324                         options.get("pkgversion").getValue(),
325                         Boolean.parseBoolean(options.get("bcp47").getValue()),
326                         Boolean.parseBoolean(options.get("bcp47-no-subtags").getValue()),
327                         Boolean.parseBoolean(options.get("Modern").getValue()),
328                         Boolean.parseBoolean(options.get("Redundant").getValue()),
329                         Optional.ofNullable(options.get("license-file").getValue())
330                                 .filter(s -> !s.isEmpty()));
331 
332         DraftStatus status = DraftStatus.valueOf(options.get("draftstatus").getValue());
333         l2jc.processDirectory(runType, status);
334     }
335 
336     // The CLDR file directory where those official XML files will be found.
337     private String cldrCommonDir;
338     // Where the generated JSON files will be stored.
339     private String outputDir;
340     // Whether data in main should output all numbering systems, even those not in use in the
341     // locale.
342     private boolean fullNumbers;
343     // Whether data in main should be resolved for output.
344     private boolean resolve;
345     // Used to match specific locales for output
346     private String match;
347     // Used to filter based on coverage
348     private int coverageValue;
349     // Whether we should write output files into installable packages
350     private boolean writePackages;
351     // Type of run for this converter: main, supplemental, or segments
352     private final RunType type;
353     // include Redundant data such as apc="apc", en_US="en (US)"
354     private boolean includeRedundant;
355 
356     static class JSONSection implements Comparable<JSONSection> {
357         public String section;
358         public Pattern pattern;
359         public String packageName;
360 
361         @Override
compareTo(JSONSection other)362         public int compareTo(JSONSection other) {
363             return section.compareTo(other.section);
364         }
365     }
366 
367     private Map<String, String> dependencies;
368     private List<JSONSection> sections;
369     private Set<String> packages;
370     private final String pkgVersion;
371     private final boolean strictBcp47;
372     private final boolean writeModernPackage;
373     private final Optional<String> licenseFile;
374     private final boolean skipBcp47LocalesWithSubtags;
375     private LdmlConfigFileReader configFileReader;
376 
Ldml2JsonConverter( String cldrDir, String outputDir, String runType, boolean fullNumbers, boolean resolve, String coverage, String match, boolean writePackages, String configFile, String pkgVersion, boolean strictBcp47, boolean skipBcp47LocalesWithSubtags, boolean writeModernPackage, boolean includeRedundant, Optional<String> licenseFile)377     public Ldml2JsonConverter(
378             String cldrDir,
379             String outputDir,
380             String runType,
381             boolean fullNumbers,
382             boolean resolve,
383             String coverage,
384             String match,
385             boolean writePackages,
386             String configFile,
387             String pkgVersion,
388             boolean strictBcp47,
389             boolean skipBcp47LocalesWithSubtags,
390             boolean writeModernPackage,
391             boolean includeRedundant,
392             Optional<String> licenseFile) {
393         this.writeModernPackage = writeModernPackage;
394         this.strictBcp47 = strictBcp47;
395         this.skipBcp47LocalesWithSubtags = strictBcp47 && skipBcp47LocalesWithSubtags;
396         this.cldrCommonDir = cldrDir;
397         this.outputDir = outputDir;
398         try {
399             this.type = RunType.valueOf(runType);
400         } catch (IllegalArgumentException | NullPointerException e) {
401             throw new RuntimeException(
402                     "runType (-t) invalid: " + runType + " must be one of " + RunType.valueList(),
403                     e);
404         }
405         this.fullNumbers = fullNumbers;
406         this.resolve = resolve;
407         this.match = match;
408         this.writePackages = writePackages;
409         this.coverageValue = Level.get(coverage).getLevel();
410         this.pkgVersion = pkgVersion;
411 
412         LdmlConvertRules.addVersionHandler(pkgVersion.split("\\.")[0]);
413 
414         configFileReader = new LdmlConfigFileReader();
415         configFileReader.read(configFile, type);
416         this.dependencies = configFileReader.getDependencies();
417         this.sections = configFileReader.getSections();
418         this.packages = new TreeSet<>();
419         this.includeRedundant = includeRedundant;
420         this.licenseFile = licenseFile;
421     }
422 
423     /**
424      * @see XPathParts#addInternal
425      */
426     static final Pattern ANNOTATION_CP_REMAP =
427             PatternCache.get("^(.*)\\[@cp=\"(\\[|\\]|'|\"|@|/|=)\"\\](.*)$");
428 
429     /**
430      * Transform the path by applying PATH_TRANSFORMATIONS rules.
431      *
432      * @param pathStr The path string being transformed.
433      * @return The transformed path.
434      */
transformPath(final String pathStr, final String pathPrefix)435     private String transformPath(final String pathStr, final String pathPrefix) {
436         String result = pathStr;
437 
438         // handle annotation cp value
439         Matcher cpm = ANNOTATION_CP_REMAP.matcher(result);
440         if (cpm.matches()) {
441             // We need to avoid breaking the syntax not just of JSON, but of XPATH.
442             final String badCodepointRange = cpm.group(2);
443             StringBuilder sb = new StringBuilder(cpm.group(1)).append("[@cp=\"");
444             // JSON would handle a wide range of things if escaped, but XPATH will not.
445             if (badCodepointRange.codePointCount(0, badCodepointRange.length()) != 1) {
446                 // forbid more than one U+ (because we will have to unescape it.)
447                 throw new IllegalArgumentException(
448                         "Need exactly one codepoint in the @cp string, but got "
449                                 + badCodepointRange
450                                 + " in xpath "
451                                 + pathStr);
452             }
453             badCodepointRange
454                     .codePoints()
455                     .forEach(cp -> sb.append("U+").append(Integer.toHexString(cp).toUpperCase()));
456             sb.append("\"]").append(cpm.group(3));
457             result = sb.toString();
458         }
459 
460         logger.finest(" IN pathStr : " + result);
461         result = LdmlConvertRules.PathTransformSpec.applyAll(result);
462         result = result.replaceFirst("/ldml/", pathPrefix);
463         result = result.replaceFirst("/supplementalData/", pathPrefix);
464 
465         if (result.startsWith("//cldr/supplemental/references/reference")) {
466             // no change
467         } else if (strictBcp47) {
468             // Look for something like <!--@MATCH:set/validity/locale--> in DTD
469             if (result.contains("localeDisplayNames/languages/language")) {
470                 if (result.contains("type=\"root\"")) {
471                     // This is strictBcp47
472                     // Drop translation for 'root' as it conflicts with 'und'
473                     return ""; // 'drop this path'
474                 }
475                 result = fixXpathBcp47(result, "language", "type");
476             } else if (result.contains("likelySubtags/likelySubtag")) {
477                 if (!result.contains("\"iw\"")
478                         && !result.contains("\"in\"")
479                         && !result.contains("\"ji\"")) {
480                     // Special case: preserve 'iw' and 'in' likely subtags
481                     result = fixXpathBcp47(result, "likelySubtag", "from", "to");
482                 } else {
483                     result = underscoreToHypen(result);
484                     logger.warning("Including aliased likelySubtags: " + result);
485                 }
486             } else if (result.startsWith("//cldr/supplemental/weekData/weekOfPreference")) {
487                 result = fixXpathBcp47(result, "weekOfPreference", "locales");
488             } else if (result.startsWith("//cldr/supplemental/metadata/defaultContent")) {
489                 result = fixXpathBcp47(result, "defaultContent", "locales");
490             } else if (result.startsWith("//cldr/supplemental/grammatical")
491                     && result.contains("Data/grammaticalFeatures")) {
492                 result = fixXpathBcp47(result, "grammaticalFeatures", "locales");
493             } else if (result.startsWith("//cldr/supplemental/grammatical")
494                     && result.contains("Data/grammaticalDerivations")) {
495                 result = fixXpathBcp47(result, "grammaticalDerivations", "locales");
496             } else if (result.startsWith("//cldr/supplemental/dayPeriodRuleSet")) {
497                 result = fixXpathBcp47(result, "dayPeriodRules", "locales");
498             } else if (result.startsWith("//cldr/supplemental/plurals")) {
499                 result = fixXpathBcp47(result, "pluralRules", "locales");
500             } else if (result.startsWith("//cldr/supplemental/timeData/hours")) {
501                 result = fixXpathBcp47MishMash(result, "hours", "regions");
502             } else if (result.startsWith("//cldr/supplemental/parentLocales/parentLocale")) {
503                 result = fixXpathBcp47(result, "parentLocale", "parent", "locales");
504             } else if (result.startsWith(
505                     "//cldr/supplemental/territoryInfo/territory/languagePopulation")) {
506                 result = fixXpathBcp47(result, "languagePopulation", "type");
507             } else if (result.contains("languages")
508                     || result.contains("languageAlias")
509                     || result.contains("languageMatches")
510                     || result.contains("likelySubtags")
511                     || result.contains("parentLocale")
512                     || result.contains("locales=")) {
513                 final String oldResult = result;
514                 result = underscoreToHypen(result);
515                 if (!oldResult.equals(result)) {
516                     logger.fine(oldResult + " => " + result);
517                 }
518             }
519         } else if (result.contains("languages")
520                 || result.contains("languageAlias")
521                 || result.contains("languageMatches")
522                 || result.contains("likelySubtags")
523                 || result.contains("parentLocale")
524                 || result.contains("locales=")) {
525             // old behavior: just munge paths..
526             result = underscoreToHypen(result);
527         }
528         logger.finest("OUT pathStr : " + result);
529         logger.finest("result: " + result);
530         return result;
531     }
532 
533     /** Read all paths in the file, and assign each to a JSONSection. Return the map. */
mapPathsToSections( AtomicInteger readCount, int totalCount, CLDRFile file, String pathPrefix, SupplementalDataInfo sdi)534     private Map<JSONSection, List<CldrItem>> mapPathsToSections(
535             AtomicInteger readCount,
536             int totalCount,
537             CLDRFile file,
538             String pathPrefix,
539             SupplementalDataInfo sdi)
540             throws IOException, ParseException {
541         final Map<JSONSection, List<CldrItem>> sectionItems = new TreeMap<>();
542 
543         String locID = file.getLocaleID();
544         Matcher noNumberingSystemMatcher = LdmlConvertRules.NO_NUMBERING_SYSTEM_PATTERN.matcher("");
545         Matcher numberingSystemMatcher = LdmlConvertRules.NUMBERING_SYSTEM_PATTERN.matcher("");
546         Matcher rootIdentityMatcher = LdmlConvertRules.ROOT_IDENTITY_PATTERN.matcher("");
547         Matcher versionMatcher = LdmlConvertRules.VERSION_PATTERN.matcher("");
548         Set<String> activeNumberingSystems = new TreeSet<>();
549         activeNumberingSystems.add("latn"); // Always include latin script numbers
550         for (String np : LdmlConvertRules.ACTIVE_NUMBERING_SYSTEM_XPATHS) {
551             String ns = file.getWinningValue(np);
552             if (ns != null && ns.length() > 0) {
553                 activeNumberingSystems.add(ns);
554             }
555         }
556         final DtdType fileDtdType = file.getDtdType();
557         CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo();
558         // read paths in DTD order. The order is critical for JSON processing.
559         final CLDRFile.Status status = new CLDRFile.Status();
560         for (Iterator<String> it =
561                         file.iterator("", DtdData.getInstance(fileDtdType).getDtdComparator(null));
562                 it.hasNext(); ) {
563             int cv = Level.UNDETERMINED.getLevel();
564             final String path = it.next();
565 
566             // Check for code-fallback and constructed first, even before fullpath and value
567             final String localeWhereFound = file.getSourceLocaleID(path, status);
568             if (!includeRedundant
569                     && (localeWhereFound.equals(XMLSource.CODE_FALLBACK_ID)
570                             || // language[@type="apc"] = apc : missing
571                             status.pathWhereFound.equals(
572                                     GlossonymConstructor
573                                             .PSEUDO_PATH))) { // language[@type="fa_AF"] = fa (AF)
574                 // or Farsi (Afghanistan) : missing
575                 // Don't include these paths.
576                 continue;
577             }
578 
579             // now get the fullpath and value
580             String fullPath = file.getFullXPath(path);
581             String value = file.getWinningValue(path);
582 
583             if (fullPath == null) {
584                 fullPath = path;
585             }
586 
587             if (!CLDRFile.isSupplementalName(locID)
588                     && path.startsWith("//ldml/")
589                     && !path.contains("/identity")) {
590                 cv = covInfo.getCoverageValue(path, locID);
591             }
592             if (cv > coverageValue) {
593                 continue;
594             }
595             // Discard root identity element unless the locale is root
596             // TODO: CLDR-17790 this code should not be needed.
597             rootIdentityMatcher.reset(fullPath);
598             if (rootIdentityMatcher.matches() && !"root".equals(locID)) {
599                 continue;
600             }
601 
602             // discard version stuff
603             versionMatcher.reset(fullPath);
604             if (versionMatcher.matches()) {
605                 // drop //ldml/identity/version entirely.
606                 continue;
607             }
608 
609             // automatically filter out number symbols and formats without a numbering system
610             noNumberingSystemMatcher.reset(fullPath);
611             if (noNumberingSystemMatcher.matches()) {
612                 continue;
613             }
614 
615             // Filter out non-active numbering systems data unless fullNumbers is specified.
616             numberingSystemMatcher.reset(fullPath);
617             if (numberingSystemMatcher.matches() && !fullNumbers) {
618                 XPathParts xpp = XPathParts.getFrozenInstance(fullPath);
619                 String currentNS = xpp.getAttributeValue(2, "numberSystem");
620                 if (currentNS != null && !activeNumberingSystems.contains(currentNS)) {
621                     continue;
622                 }
623             }
624 
625             // Handle the no inheritance marker.
626             if (resolve && CldrUtility.NO_INHERITANCE_MARKER.equals(value)) {
627                 continue;
628             }
629 
630             // discard draft before transforming
631             final String pathNoDraft = CLDRFile.DRAFT_PATTERN.matcher(path).replaceAll("");
632             final String fullPathNoDraft = CLDRFile.DRAFT_PATTERN.matcher(fullPath).replaceAll("");
633 
634             final String pathNoXmlSpace =
635                     CLDRFile.XML_SPACE_PATTERN.matcher(pathNoDraft).replaceAll("");
636             final String fullPathNoXmlSpace =
637                     CLDRFile.XML_SPACE_PATTERN.matcher(fullPathNoDraft).replaceAll("");
638 
639             final String transformedPath = transformPath(pathNoXmlSpace, pathPrefix);
640             final String transformedFullPath = transformPath(fullPathNoXmlSpace, pathPrefix);
641 
642             if (transformedPath.isEmpty()) {
643                 continue; // skip this path
644             }
645 
646             for (JSONSection js :
647                     sections) { // TODO: move to subfunction, error if >1 section matches
648                 if (js.pattern.matcher(transformedPath).matches()) {
649                     CldrItem item =
650                             new CldrItem(
651                                     transformedPath, transformedFullPath, path, fullPath, value);
652 
653                     List<CldrItem> cldrItems = sectionItems.get(js);
654                     if (cldrItems == null) {
655                         cldrItems = new ArrayList<>();
656                     }
657                     cldrItems.add(item);
658                     sectionItems.put(js, cldrItems);
659                     break;
660                 }
661             }
662         }
663 
664         // TODO: move matcher out of inner loop
665         final Matcher versionInfoMatcher = VERSION_INFO_PATTERN.matcher("");
666         // Automatically copy the version info to any sections that had real data in them.
667         JSONSection otherSection = sections.get(sections.size() - 1);
668         List<CldrItem> others = sectionItems.get(otherSection);
669         if (others == null) {
670             return sectionItems;
671         }
672         List<CldrItem> otherSectionItems = new ArrayList<>(others);
673         int addedItemCount = 0;
674         boolean copyIdentityInfo = Boolean.parseBoolean(options.get("identity").getValue());
675 
676         for (CldrItem item : otherSectionItems) {
677             String thisPath = item.getPath();
678             versionInfoMatcher.reset(thisPath);
679             if (versionInfoMatcher.matches()) {
680                 for (JSONSection js : sections) {
681                     if (sectionItems.get(js) != null
682                             && !js.section.equals("other")
683                             && copyIdentityInfo) {
684                         List<CldrItem> hit = sectionItems.get(js);
685                         hit.add(addedItemCount, item);
686                         sectionItems.put(js, hit);
687                     }
688                     if (js.section.equals("other")) { // did not match one of the regular sections
689                         List<CldrItem> hit = sectionItems.get(js);
690                         hit.remove(item);
691                         sectionItems.put(js, hit);
692                     }
693                 }
694                 addedItemCount++;
695             }
696         }
697         return sectionItems;
698     }
699 
700     static final Pattern VERSION_INFO_PATTERN = PatternCache.get(".*/(identity|version).*");
701     static final Pattern HAS_SUBTAG = PatternCache.get(".*-[a-z]-.*");
702 
703     /**
704      * Convert CLDR's XML data to JSON format.
705      *
706      * @param file CLDRFile object.
707      * @param outFilename The file name used to save JSON data.
708      * @throws IOException
709      * @throws ParseException
710      * @return total items written in all files. (if 0, file had no effect)
711      */
convertCldrItems( AtomicInteger readCount, int totalCount, String dirName, String filename, String pathPrefix, final Map<JSONSection, List<CldrItem>> sectionItems)712     private int convertCldrItems(
713             AtomicInteger readCount,
714             int totalCount,
715             String dirName,
716             String filename,
717             String pathPrefix,
718             final Map<JSONSection, List<CldrItem>> sectionItems)
719             throws IOException, ParseException {
720         // zone and timezone items are queued for sorting first before they are
721         // processed.
722 
723         final String filenameAsLangTag = unicodeLocaleToString(filename);
724 
725         if (skipBcp47LocalesWithSubtags
726                 && type.locales()
727                 && HAS_SUBTAG.matcher(filenameAsLangTag).matches()) {
728             // Has a subtag, so skip it.
729             // It will show up in the "no output" list.
730             return 0;
731         }
732 
733         int totalItemsInFile = 0;
734 
735         List<Pair<String, Integer>> outputProgress = new LinkedList<>();
736 
737         for (JSONSection js : sections) {
738             if (js.section.equals("IGNORE")) {
739                 continue;
740             }
741             String outFilename;
742             if (type == RunType.rbnf) {
743                 outFilename = filenameAsLangTag + ".json";
744             } else if (type == RunType.bcp47) {
745                 outFilename = filename + ".json";
746             } else if (type == RunType.transforms) {
747                 outFilename = filename + ".json";
748             } else if (js.section.equals("other")) {
749                 // If you see other-___.json, it means items that were missing from
750                 // JSON_config_*.txt
751                 outFilename = js.section + "-" + filename + ".json"; // Use original filename
752             } else {
753                 outFilename = js.section + ".json";
754             }
755             String tier = "";
756             boolean writeOther = Boolean.parseBoolean(options.get("other").getValue());
757             if (js.section.equals("other") && !writeOther) {
758                 continue;
759             } else {
760                 StringBuilder outputDirname = new StringBuilder(outputDir);
761                 if (writePackages) {
762                     if (type.tiered()) {
763                         LocaleIDParser lp = new LocaleIDParser();
764                         lp.set(filename);
765                         if (defaultContentLocales.contains(filename)
766                                 && lp.getRegion().length() > 0) {
767                             if (type == RunType.main) {
768                                 skippedDefaultContentLocales.add(filenameAsLangTag);
769                             }
770                             continue;
771                         }
772                         final boolean isModernTier = localeIsModernTier(filename);
773                         if (isModernTier && writeModernPackage) {
774                             tier = MODERN_TIER_SUFFIX;
775                             if (type == RunType.main) {
776                                 avl.modern.add(filenameAsLangTag);
777                             }
778                         } else {
779                             tier = FULL_TIER_SUFFIX;
780                         }
781                         if (type == RunType.main) {
782                             avl.full.add(filenameAsLangTag);
783                         }
784                     } else if (type == RunType.rbnf
785                             || type == RunType.bcp47
786                             || type == RunType.transforms) {
787                         // untiered, just use the name
788                         js.packageName = type.name();
789                         tier = "";
790                     }
791                     if (js.packageName != null) {
792                         String packageName = CLDR_PKG_PREFIX + js.packageName + tier;
793                         outputDirname.append("/" + packageName);
794                         packages.add(packageName);
795                     }
796                     outputDirname.append("/" + dirName + "/");
797                     if (type.tiered()) {
798                         outputDirname.append(filenameAsLangTag);
799                     }
800                     logger.fine("outDir: " + outputDirname);
801                     logger.fine("pack: " + js.packageName);
802                     logger.fine("dir: " + dirName);
803                 } else {
804                     outputDirname.append("/" + filename);
805                 }
806 
807                 assert (tier.isEmpty() == !type.tiered());
808 
809                 List<String> outputDirs = new ArrayList<>();
810                 outputDirs.add(outputDirname.toString());
811                 if (writePackages && tier.equals(MODERN_TIER_SUFFIX) && js.packageName != null) {
812                     // if it is in 'modern', add it to 'full' and core also.
813                     outputDirs.add(
814                             outputDirname
815                                     .toString()
816                                     .replaceFirst(MODERN_TIER_SUFFIX, FULL_TIER_SUFFIX));
817                     // Also need to make sure that the full and core package is added
818                     packages.add(CLDR_PKG_PREFIX + js.packageName + FULL_TIER_SUFFIX);
819                 }
820 
821                 for (String outputDir : outputDirs) {
822                     List<CldrItem> theItems = sectionItems.get(js);
823                     if (theItems == null || theItems.size() == 0) {
824                         logger.fine(
825                                 () ->
826                                         ">"
827                                                 + progressPrefix(readCount, totalCount)
828                                                 + outputDir
829                                                 + " - no items to write in "
830                                                 + js.section); // mostly noise
831                         continue;
832                     }
833                     logger.fine(
834                             () ->
835                                     ("?"
836                                             + progressPrefix(
837                                                     readCount, totalCount, filename, js.section)
838                                             + " - "
839                                             + theItems.size()
840                                             + " item(s)"
841                                             + "\r"));
842                     // Create the output dir if it doesn't exist
843                     File dir = new File(outputDir.toString());
844                     if (!dir.exists()) {
845                         dir.mkdirs();
846                     }
847                     JsonObject out = new JsonObject(); // root object for writing
848 
849                     ArrayList<CldrItem> sortingItems = new ArrayList<>();
850                     ArrayList<CldrItem> arrayItems = new ArrayList<>();
851 
852                     ArrayList<CldrNode> nodesForLastItem = new ArrayList<>();
853                     String lastLeadingArrayItemPath = null;
854                     String leadingArrayItemPath = "";
855                     int valueCount = 0;
856                     String previousIdentityPath = null;
857                     for (CldrItem item : theItems) {
858                         if (item.getPath().isEmpty()) {
859                             throw new IllegalArgumentException(
860                                     "empty xpath in "
861                                             + filename
862                                             + " section "
863                                             + js.packageName
864                                             + "/"
865                                             + js.section);
866                         }
867                         if (type == RunType.rbnf) {
868                             item.adjustRbnfPath();
869                         }
870 
871                         // items in the identity section of a file should only ever contain the
872                         // lowest level, even if using
873                         // resolving source, so if we have duplicates ( caused by attributes used as
874                         // a value ) then suppress
875                         // them here.
876                         if (item.getPath().contains("/identity/")) {
877                             String[] parts = item.getPath().split("\\[");
878                             if (parts[0].equals(previousIdentityPath)) {
879                                 continue;
880                             } else {
881                                 XPathParts xpp = XPathParts.getFrozenInstance(item.getPath());
882                                 String territory = xpp.findAttributeValue("territory", "type");
883                                 LocaleIDParser lp = new LocaleIDParser().set(filename);
884                                 if (territory != null
885                                         && territory.length() > 0
886                                         && !territory.equals(lp.getRegion())) {
887                                     continue;
888                                 }
889                                 previousIdentityPath = parts[0];
890                             }
891                         }
892 
893                         if (item.getUntransformedPath()
894                                 .startsWith("//supplementalData/transforms")) {
895                             // here, write the raw data
896                             final String rawTransformFile = filename + TRANSFORM_RAW_SUFFIX;
897                             try (PrintWriter outf =
898                                     FileUtilities.openUTF8Writer(outputDir, rawTransformFile)) {
899                                 outf.println(item.getValue().trim());
900                                 // note: not logging the write here- it will be logged when the
901                                 // .json file is written.
902                             }
903                             final String path = item.getPath();
904                             item.setPath(fixTransformPath(path));
905                             final String fullPath = item.getFullPath();
906                             item.setFullPath(fixTransformPath(fullPath));
907                             // the value is now the raw filename
908                             item.setValue(rawTransformFile);
909                         }
910 
911                         // some items need to be split to multiple item before processing. None
912                         // of those items need to be sorted.
913                         // Applies to SPLITTABLE_ATTRS attributes.
914                         CldrItem[] items = item.split();
915                         if (items == null) {
916                             // Nothing to split. Make it a 1-element array.
917                             items = new CldrItem[1];
918                             items[0] = item;
919                         }
920                         valueCount += items.length;
921 
922                         // Hard code this part.
923                         if (item.getUntransformedPath().contains("unitPreference")) {
924                             // Need to do more transforms on this one, so just output version/etc
925                             // here.
926                             continue;
927                         }
928 
929                         for (CldrItem newItem : items) {
930                             // alias will be dropped in conversion, don't count it.
931                             if (newItem.isAliasItem()) {
932                                 valueCount--;
933                             }
934 
935                             // Items like zone items need to be sorted first before write them out.
936                             if (newItem.needsSort()) {
937                                 resolveArrayItems(out, nodesForLastItem, arrayItems);
938                                 sortingItems.add(newItem);
939                             } else {
940                                 Matcher matcher =
941                                         LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(
942                                                 newItem.getPath());
943                                 if (matcher.matches()) {
944                                     resolveSortingItems(out, nodesForLastItem, sortingItems);
945                                     leadingArrayItemPath = matcher.group(1);
946                                     if (lastLeadingArrayItemPath != null
947                                             && !lastLeadingArrayItemPath.equals(
948                                                     leadingArrayItemPath)) {
949                                         resolveArrayItems(out, nodesForLastItem, arrayItems);
950                                     }
951                                     lastLeadingArrayItemPath = leadingArrayItemPath;
952                                     arrayItems.add(newItem);
953                                 } else {
954                                     // output a single item
955                                     resolveSortingItems(out, nodesForLastItem, sortingItems);
956                                     resolveArrayItems(out, nodesForLastItem, arrayItems);
957                                     outputCldrItem(out, nodesForLastItem, newItem);
958                                     lastLeadingArrayItemPath = "";
959                                 }
960                             }
961                         }
962                     }
963 
964                     resolveSortingItems(out, nodesForLastItem, sortingItems);
965                     resolveArrayItems(out, nodesForLastItem, arrayItems);
966                     if (js.section.contains("unitPreferenceData")) {
967                         outputUnitPreferenceData(js, theItems, out, nodesForLastItem);
968                     }
969 
970                     // Special processing for transforms.
971                     if (type == RunType.transforms) {
972                         final JsonObject jo = out.getAsJsonObject("transforms");
973                         if (jo == null || jo.isEmpty()) {
974                             throw new RuntimeException(
975                                     "Could not get transforms object in " + filename);
976                         }
977                         @SuppressWarnings("unchecked")
978                         final Entry<String, JsonElement>[] s = jo.entrySet().toArray(new Entry[0]);
979                         if (s == null || s.length != 1) {
980                             throw new RuntimeException(
981                                     "Could not get 1 subelement of transforms in " + filename);
982                         }
983                         // key doesn't matter.
984                         // move subitem up
985                         out = s[0].getValue().getAsJsonObject();
986                         final Entry<String, JsonElement>[] s2 =
987                                 out.entrySet().toArray(new Entry[0]);
988                         if (s2 == null || s2.length != 1) {
989                             throw new RuntimeException(
990                                     "Could not get 1 sub-subelement of transforms in " + filename);
991                         }
992                         // move sub-subitem up.
993                         out = s2[0].getValue().getAsJsonObject();
994                     }
995 
996                     // write JSON
997                     try (PrintWriter outf = FileUtilities.openUTF8Writer(outputDir, outFilename)) {
998                         outf.println(gson.toJson(out));
999                     }
1000 
1001                     String outPath =
1002                             new File(outputDir.substring(this.outputDir.length()), outFilename)
1003                                     .getPath();
1004                     outputProgress.add(
1005                             Pair.of(String.format("%20s %s", js.section, outPath), valueCount));
1006                     logger.fine(
1007                             ">"
1008                                     + progressPrefix(readCount, totalCount, filename, js.section)
1009                                     + String.format("…%s (%d values)", outPath, valueCount));
1010 
1011                     totalItemsInFile += valueCount;
1012                 }
1013             }
1014         } // this is the only normal output with debug off
1015         StringBuilder outStr = new StringBuilder();
1016         if (!outputProgress.isEmpty()) {
1017             // Put these first, so the percent is at the end.
1018             for (final Pair<String, Integer> outputItem : outputProgress) {
1019                 outStr.append(
1020                         String.format("\t%6d %s\n", outputItem.getSecond(), outputItem.getFirst()));
1021             }
1022             outStr.append(
1023                     String.format(
1024                             "%s%-12s\t  %s\n",
1025                             progressPrefix(readCount, totalCount),
1026                             filename,
1027                             valueSectionsFormat(totalItemsInFile, outputProgress.size())));
1028         } else {
1029             outStr.append(
1030                     String.format(
1031                             "%s%-12s\t" + NONE_ICON + " (no output)\n",
1032                             progressPrefix(readCount, totalCount),
1033                             filename));
1034         }
1035         synchronized (readCount) { // to prevent interleaved output
1036             System.out.print(outStr);
1037         }
1038         return totalItemsInFile;
1039     }
1040 
1041     /**
1042      * Fixup an XPathParts with a specific transform element
1043      *
1044      * @param xpp the XPathParts to modify
1045      * @param attribute the attribute name, such as "alias"
1046      */
fixTransformPath(final XPathParts xpp, final String attribute)1047     private static final void fixTransformPath(final XPathParts xpp, final String attribute) {
1048         final String v = xpp.getAttributeValue(-2, attribute); // on penultimate element
1049         if (v == null) return;
1050         final Set<String> aliases = new HashSet<>();
1051         final Set<String> bcpAliases = new HashSet<>();
1052         for (final String s : v.split(" ")) {
1053             final String q = Locale.forLanguageTag(s).toLanguageTag();
1054             if (s.equals(q)) {
1055                 // bcp47 round trips- add to bcp list
1056                 bcpAliases.add(s);
1057             } else {
1058                 // different - add to other aliases.
1059                 aliases.add(s);
1060             }
1061         }
1062         if (aliases.isEmpty()) {
1063             xpp.removeAttribute(-2, attribute);
1064         } else {
1065             xpp.setAttribute(-2, attribute, String.join(" ", aliases.toArray(new String[0])));
1066         }
1067         if (bcpAliases.isEmpty()) {
1068             xpp.removeAttribute(-2, attribute + "Bcp47");
1069         } else {
1070             xpp.setAttribute(
1071                     -2, attribute + "Bcp47", String.join(" ", bcpAliases.toArray(new String[0])));
1072         }
1073     }
1074 
1075     /**
1076      * Fixup a transform path, expanding the alias and backwardAlias into bcp47 and non-bcp47
1077      * attributes.
1078      */
fixTransformPath(final String path)1079     private static final String fixTransformPath(final String path) {
1080         final XPathParts xpp = XPathParts.getFrozenInstance(path).cloneAsThawed();
1081         fixTransformPath(xpp, "alias");
1082         fixTransformPath(xpp, "backwardAlias");
1083         return xpp.toString();
1084     }
1085 
valueSectionsFormat(int values, int sections)1086     private static String valueSectionsFormat(int values, int sections) {
1087         return MessageFormat.format(
1088                 "({0, plural,  one {# value} other {# values}} in {1, plural, one {# section} other {# sections}})",
1089                 values,
1090                 sections);
1091     }
1092 
localeIsModernTier(String filename)1093     private boolean localeIsModernTier(String filename) {
1094         Level lev = CalculatedCoverageLevels.getInstance().getEffectiveCoverageLevel(filename);
1095         if (lev == null) return false;
1096         return lev.isAtLeast(Level.MODERN);
1097     }
1098 
localeIsBasicTier(String filename)1099     private boolean localeIsBasicTier(String filename) {
1100         Level lev = CalculatedCoverageLevels.getInstance().getEffectiveCoverageLevel(filename);
1101         if (lev == null) return false;
1102         return lev.isAtLeast(Level.BASIC);
1103     }
1104 
1105     /**
1106      * Entire xpaths and random short strings are passed through this function. Not really Locale ID
1107      * to Language Tag.
1108      *
1109      * @param filename
1110      * @return
1111      */
underscoreToHypen(String filename)1112     private String underscoreToHypen(String filename) {
1113         return filename.replaceAll("_", "-");
1114     }
1115 
1116     /**
1117      * Bottleneck for converting Unicode Locale ID (root, ca_ES_VALENCIA) to String for filename or
1118      * data item. If strictBcp47 is true (default) then it will convert to (und, ca-ES-valencia)
1119      *
1120      * @param locale
1121      * @return
1122      */
unicodeLocaleToString(String locale)1123     private final String unicodeLocaleToString(String locale) {
1124         if (strictBcp47) {
1125             return CLDRLocale.toLanguageTag(locale);
1126         } else {
1127             return underscoreToHypen(locale);
1128         }
1129     }
1130 
1131     Pattern IS_REGION_CODE = PatternCache.get("([A-Z][A-Z])|([0-9][0-9][0-9])");
1132     /**
1133      * Bottleneck for converting Unicode Locale ID (root, ca_ES_VALENCIA) to String for filename or
1134      * data item. If strictBcp47 is true (default) then it will convert to (und, ca-ES-valencia)
1135      * Differs from unicodeLocaleToString in that it will preserve all uppercase region ids
1136      *
1137      * @param locale
1138      * @return
1139      */
unicodeLocaleMishMashToString(String locale)1140     private final String unicodeLocaleMishMashToString(String locale) {
1141         if (strictBcp47) {
1142             if (IS_REGION_CODE.matcher(locale).matches()) {
1143                 return locale;
1144             } else {
1145                 return CLDRLocale.toLanguageTag(locale);
1146             }
1147         } else {
1148             return underscoreToHypen(locale);
1149         }
1150     }
1151 
1152     /**
1153      * Fixup a path to be BCP47 compliant
1154      *
1155      * @param path XPath (usually ends in elementName, but not necessarily)
1156      * @param elementName element to fixup
1157      * @param attributeNames list of attributes to fix
1158      * @return new path
1159      */
fixXpathBcp47(final String path, String elementName, String... attributeNames)1160     final String fixXpathBcp47(final String path, String elementName, String... attributeNames) {
1161         final XPathParts xpp = XPathParts.getFrozenInstance(path).cloneAsThawed();
1162         for (final String attributeName : attributeNames) {
1163             final String oldValue = xpp.findAttributeValue(elementName, attributeName);
1164             if (oldValue == null) continue;
1165             final String oldValues[] = oldValue.split(" ");
1166             String newValue =
1167                     Arrays.stream(oldValues)
1168                             .map((String s) -> unicodeLocaleToString(s))
1169                             .collect(Collectors.joining(" "));
1170             if (!oldValue.equals(newValue)) {
1171                 xpp.setAttribute(elementName, attributeName, newValue);
1172                 logger.finest(attributeName + " = " + oldValue + " -> " + newValue);
1173             }
1174         }
1175         return xpp.toString();
1176     }
1177 
1178     /**
1179      * Fixup a path to be BCP47 compliant …but support a mishmash of regions and locale ids
1180      * CLDR-15069
1181      *
1182      * @param path XPath (usually ends in elementName, but not necessarily)
1183      * @param elementName element to fixup
1184      * @param attributeNames list of attributes to fix
1185      * @return new path
1186      */
fixXpathBcp47MishMash( final String path, String elementName, String... attributeNames)1187     final String fixXpathBcp47MishMash(
1188             final String path, String elementName, String... attributeNames) {
1189         final XPathParts xpp = XPathParts.getFrozenInstance(path).cloneAsThawed();
1190         for (final String attributeName : attributeNames) {
1191             final String oldValue = xpp.findAttributeValue(elementName, attributeName);
1192             if (oldValue == null) continue;
1193             final String oldValues[] = oldValue.split(" ");
1194             String newValue =
1195                     Arrays.stream(oldValues)
1196                             .map((String s) -> unicodeLocaleMishMashToString(s))
1197                             .collect(Collectors.joining(" "));
1198             if (!oldValue.equals(newValue)) {
1199                 xpp.setAttribute(elementName, attributeName, newValue);
1200                 logger.finest(attributeName + " = " + oldValue + " -> " + newValue);
1201             }
1202         }
1203         return xpp.toString();
1204     }
1205 
outputUnitPreferenceData( JSONSection js, List<CldrItem> theItems, JsonObject out, ArrayList<CldrNode> nodesForLastItem)1206     private void outputUnitPreferenceData(
1207             JSONSection js,
1208             List<CldrItem> theItems,
1209             JsonObject out,
1210             ArrayList<CldrNode> nodesForLastItem)
1211             throws ParseException, IOException {
1212         // handle these specially.
1213         // redo earlier loop somewhat.
1214         CldrNode supplementalNode = CldrNode.createNode("cldr", "supplemental", "supplemental");
1215         JsonElement supplementalObject = startNonleafNode(out, supplementalNode);
1216         CldrNode unitPrefNode = CldrNode.createNode("supplemental", js.section, js.section);
1217         final JsonElement o = startNonleafNode(supplementalObject, unitPrefNode);
1218 
1219         // We'll directly write to 'out'
1220 
1221         // Unit preference sorting is a bit more complicated, so we're going to use the CldrItems,
1222         // but collect the results more directly.
1223 
1224         Map<Pair<String, String>, Map<String, List<CldrItem>>> catUsagetoRegionItems =
1225                 new TreeMap<>();
1226 
1227         for (CldrItem item : theItems) {
1228             if (!item.getUntransformedPath().contains("unitPref")) {
1229                 continue;
1230             }
1231             CldrItem[] items = item.split();
1232             if (items == null) {
1233                 throw new IllegalArgumentException("expected unit pref to split: " + item);
1234             }
1235             for (final CldrItem subItem : items) {
1236                 // step 1: make sure the category/usage is there
1237                 final XPathParts xpp = XPathParts.getFrozenInstance(subItem.getPath());
1238                 final String category = xpp.findFirstAttributeValue("category");
1239                 final String usage = xpp.findFirstAttributeValue("usage");
1240                 final String region =
1241                         xpp.findFirstAttributeValue("regions"); // actually one region (split)
1242                 Pair<String, String> key = Pair.of(category, usage);
1243                 Map<String, List<CldrItem>> regionMap =
1244                         catUsagetoRegionItems.computeIfAbsent(key, ignored -> new TreeMap<>());
1245                 List<CldrItem> perRegion =
1246                         regionMap.computeIfAbsent(region, ignored -> new ArrayList<>());
1247                 perRegion.add(subItem);
1248             }
1249         }
1250 
1251         // OK, now start outputting
1252         // Traverse categories/usage/regions
1253         // unitPreferenceData is already open {
1254         catUsagetoRegionItems.keySet().stream()
1255                 .map(p -> p.getFirst())
1256                 .distinct() // for each category
1257                 .forEach(
1258                         category -> {
1259                             JsonObject oo = new JsonObject();
1260                             o.getAsJsonObject().add(category, oo);
1261 
1262                             catUsagetoRegionItems.entrySet().stream()
1263                                     .filter(p -> p.getKey().getFirst().equals(category))
1264                                     .forEach(
1265                                             ent -> {
1266                                                 final String usage = ent.getKey().getSecond();
1267                                                 JsonObject ooo = new JsonObject();
1268                                                 oo.getAsJsonObject().add(usage, ooo);
1269 
1270                                                 ent.getValue()
1271                                                         .forEach(
1272                                                                 (region, list) -> {
1273                                                                     JsonArray array =
1274                                                                             new JsonArray();
1275                                                                     ooo.getAsJsonObject()
1276                                                                             .add(region, array);
1277                                                                     list.forEach(
1278                                                                             item -> {
1279                                                                                 final XPathParts
1280                                                                                         xpp =
1281                                                                                                 XPathParts
1282                                                                                                         .getFrozenInstance(
1283                                                                                                                 item
1284                                                                                                                         .getPath());
1285                                                                                 JsonObject u =
1286                                                                                         new JsonObject();
1287                                                                                 array.add(u);
1288                                                                                 u.addProperty(
1289                                                                                         "unit",
1290                                                                                         item
1291                                                                                                 .getValue());
1292                                                                                 if (xpp
1293                                                                                         .containsAttribute(
1294                                                                                                 "geq")) {
1295                                                                                     u.addProperty(
1296                                                                                             "geq",
1297                                                                                             Double
1298                                                                                                     .parseDouble(
1299                                                                                                             xpp
1300                                                                                                                     .findFirstAttributeValue(
1301                                                                                                                             "geq")));
1302                                                                                 }
1303                                                                             });
1304                                                                 });
1305                                             });
1306                         });
1307 
1308         // Computer, switch to 'automatic' navigation
1309         // We'll let closeNodes take over.
1310         nodesForLastItem.add(unitPrefNode); // unitPreferenceData }
1311     }
1312 
1313     /**
1314      * Creates the packaging files ( i.e. package.json ) for a particular package
1315      *
1316      * @param packageName The name of the installable package
1317      */
writePackagingFiles(String outputDir, String packageName)1318     public void writePackagingFiles(String outputDir, String packageName) throws IOException {
1319         File dir = new File(outputDir.toString());
1320         if (!dir.exists()) {
1321             dir.mkdirs();
1322         }
1323         writePackageJson(outputDir, packageName);
1324         writeBowerJson(outputDir, packageName);
1325         writeReadme(outputDir, packageName);
1326     }
1327 
1328     /** Write the ## License section */
writeCopyrightSection(PrintWriter out)1329     public void writeCopyrightSection(PrintWriter out) {
1330         out.println(
1331                 CldrUtility.getCopyrightMarkdown()
1332                         + "\n"
1333                         + "A copy of the license is included as [LICENSE](./LICENSE).");
1334     }
1335 
1336     /**
1337      * Write the readme fragment from cldr-json-readme.md plus the copyright
1338      *
1339      * @param outf
1340      * @throws IOException
1341      */
writeReadmeSection(PrintWriter outf)1342     private void writeReadmeSection(PrintWriter outf) throws IOException {
1343         FileCopier.copy(CldrUtility.getUTF8Data("cldr-json-readme.md"), outf);
1344         outf.println();
1345         writeCopyrightSection(outf);
1346     }
1347 
writeReadme(String outputDir, String packageName)1348     public void writeReadme(String outputDir, String packageName) throws IOException {
1349         final String basePackageName = getBasePackageName(packageName);
1350         try (PrintWriter outf =
1351                 FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "README.md"); ) {
1352             outf.println("# " + packageName);
1353             outf.println();
1354             outf.println(configFileReader.getPackageDescriptions().get(basePackageName));
1355             outf.println();
1356             if (packageName.endsWith(FULL_TIER_SUFFIX)) {
1357                 outf.println("This package contains all locales.");
1358                 outf.println();
1359             } else if (packageName.endsWith(MODERN_TIER_SUFFIX)) {
1360                 outf.println(
1361                         "**Deprecated** This package contains only the set of locales listed as modern coverage. Use `"
1362                                 + CLDR_PKG_PREFIX
1363                                 + basePackageName
1364                                 + FULL_TIER_SUFFIX
1365                                 + "` and locale coverage data instead. The -modern packages are scheduled to be removed in v46, see [CLDR-16465](https://unicode-org.atlassian.net/browse/CLDR-16465).");
1366                 outf.println();
1367             }
1368             outf.println();
1369             outf.println(getNpmBadge(packageName));
1370             outf.println();
1371             writeReadmeSection(outf);
1372         }
1373         try (PrintWriter outf =
1374                 FileUtilities.openUTF8Writer(
1375                         outputDir + "/" + packageName, CldrUtility.LICENSE); ) {
1376             if (licenseFile.isPresent()) {
1377                 try (BufferedReader br = FileUtilities.openUTF8Reader("", licenseFile.get()); ) {
1378                     FileCopier.copy(br, outf);
1379                 }
1380             } else {
1381                 FileCopier.copy(CldrUtility.getUTF8Data(CldrUtility.LICENSE), outf);
1382             }
1383         }
1384     }
1385 
getBasePackageName(final String packageName)1386     String getBasePackageName(final String packageName) {
1387         String basePackageName = packageName;
1388         if (basePackageName.startsWith(CLDR_PKG_PREFIX)) {
1389             basePackageName = basePackageName.substring(CLDR_PKG_PREFIX.length());
1390         }
1391         if (basePackageName.endsWith(FULL_TIER_SUFFIX)) {
1392             basePackageName =
1393                     basePackageName.substring(
1394                             0, basePackageName.length() - FULL_TIER_SUFFIX.length());
1395         } else if (basePackageName.endsWith(MODERN_TIER_SUFFIX)) {
1396             basePackageName =
1397                     basePackageName.substring(
1398                             0, basePackageName.length() - MODERN_TIER_SUFFIX.length());
1399         }
1400         return basePackageName;
1401     }
1402 
writeBasicInfo(JsonObject obj, String packageName, boolean isNPM)1403     public void writeBasicInfo(JsonObject obj, String packageName, boolean isNPM) {
1404         obj.addProperty("name", packageName);
1405         obj.addProperty("version", pkgVersion);
1406 
1407         String[] packageNameParts = packageName.split("-");
1408         String dependency = dependencies.get(packageNameParts[1]);
1409         if (dependency != null) {
1410             String[] dependentPackageNames = new String[1];
1411             String tier = packageNameParts[packageNameParts.length - 1];
1412             if (dependency.equals("core") || dependency.equals("bcp47")) {
1413                 dependentPackageNames[0] = CLDR_PKG_PREFIX + dependency;
1414             } else {
1415                 dependentPackageNames[0] = CLDR_PKG_PREFIX + dependency + "-" + tier;
1416             }
1417 
1418             JsonObject dependencies = new JsonObject();
1419             for (String dependentPackageName : dependentPackageNames) {
1420                 if (dependentPackageName != null) {
1421                     dependencies.addProperty(dependentPackageName, pkgVersion);
1422                 }
1423             }
1424             obj.add(isNPM ? "peerDependencies" : "dependencies", dependencies);
1425         }
1426     }
1427 
1428     /**
1429      * Default for version string
1430      *
1431      * @return
1432      */
getDefaultVersion()1433     private static String getDefaultVersion() {
1434         String versionString = CLDRFile.GEN_VERSION;
1435         while (versionString.split("\\.").length < 3) {
1436             versionString = versionString + ".0";
1437         }
1438         return versionString;
1439     }
1440 
writePackageJson(String outputDir, String packageName)1441     public void writePackageJson(String outputDir, String packageName) throws IOException {
1442         PrintWriter outf =
1443                 FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "package.json");
1444         logger.fine(
1445                 PACKAGE_ICON
1446                         + " Creating packaging file => "
1447                         + outputDir
1448                         + File.separator
1449                         + packageName
1450                         + File.separator
1451                         + "package.json");
1452         JsonObject obj = new JsonObject();
1453         writeBasicInfo(obj, packageName, true);
1454 
1455         JsonArray maintainers = new JsonArray();
1456         JsonObject primaryMaintainer = new JsonObject();
1457         JsonObject secondaryMaintainer = new JsonObject();
1458 
1459         final String basePackageName = getBasePackageName(packageName);
1460         String description = configFileReader.getPackageDescriptions().get(basePackageName);
1461         if (packageName.endsWith(MODERN_TIER_SUFFIX)) {
1462             description = description + " (modern only: deprecated)";
1463         }
1464         obj.addProperty("description", description);
1465 
1466         obj.addProperty("homepage", CLDRURLS.CLDR_HOMEPAGE);
1467         obj.addProperty("author", CLDRURLS.UNICODE_CONSORTIUM);
1468 
1469         primaryMaintainer.addProperty("name", "Steven R. Loomis");
1470         primaryMaintainer.addProperty("email", "srloomis@unicode.org");
1471 
1472         maintainers.add(primaryMaintainer);
1473 
1474         secondaryMaintainer.addProperty("name", "John Emmons");
1475         secondaryMaintainer.addProperty("email", "emmo@us.ibm.com");
1476         secondaryMaintainer.addProperty("url", "https://github.com/JCEmmons");
1477 
1478         maintainers.add(secondaryMaintainer);
1479         obj.add("maintainers", maintainers);
1480 
1481         JsonObject repository = new JsonObject();
1482         repository.addProperty("type", "git");
1483         repository.addProperty("url", "git://github.com/unicode-cldr/cldr-json.git");
1484         obj.add("repository", repository);
1485 
1486         obj.addProperty("license", CLDRURLS.UNICODE_SPDX);
1487         obj.addProperty("bugs", CLDRURLS.CLDR_NEWTICKET_URL);
1488 
1489         final SupplementalDataInfo sdi = CLDRConfig.getInstance().getSupplementalDataInfo();
1490         obj.addProperty("cldrVersion", sdi.getCldrVersionString());
1491         obj.addProperty("unicodeVersion", sdi.getUnicodeVersionString());
1492 
1493         outf.println(gson.toJson(obj));
1494         outf.close();
1495     }
1496 
writeBowerJson(String outputDir, String packageName)1497     public void writeBowerJson(String outputDir, String packageName) throws IOException {
1498         PrintWriter outf =
1499                 FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "bower.json");
1500         logger.fine(
1501                 PACKAGE_ICON
1502                         + " Creating packaging file => "
1503                         + outputDir
1504                         + File.separator
1505                         + packageName
1506                         + File.separator
1507                         + "bower.json");
1508         JsonObject obj = new JsonObject();
1509         writeBasicInfo(obj, packageName, false);
1510         if (type == RunType.supplemental) {
1511             JsonArray mainPaths = new JsonArray();
1512             mainPaths.add(new JsonPrimitive("availableLocales.json"));
1513             mainPaths.add(new JsonPrimitive("defaultContent.json")); // Handled specially
1514             mainPaths.add(new JsonPrimitive("scriptMetadata.json"));
1515             mainPaths.add(new JsonPrimitive(type.toString() + "/*.json"));
1516             obj.add("main", mainPaths);
1517         } else if (type == RunType.rbnf) {
1518             obj.addProperty("main", type.toString() + "/*.json");
1519         } else {
1520             obj.addProperty("main", type.toString() + "/**/*.json");
1521         }
1522 
1523         JsonArray ignorePaths = new JsonArray();
1524         ignorePaths.add(new JsonPrimitive(".gitattributes"));
1525         ignorePaths.add(new JsonPrimitive("README.md"));
1526         obj.add("ignore", ignorePaths);
1527         obj.addProperty("license", CLDRURLS.UNICODE_SPDX);
1528 
1529         outf.println(gson.toJson(obj));
1530         outf.close();
1531     }
1532 
writeDefaultContent(String outputDir)1533     public void writeDefaultContent(String outputDir) throws IOException {
1534         PrintWriter outf =
1535                 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "defaultContent.json");
1536         System.out.println(
1537                 PACKAGE_ICON
1538                         + " Creating packaging file => "
1539                         + outputDir
1540                         + "/cldr-core"
1541                         + File.separator
1542                         + "defaultContent.json");
1543         JsonObject obj = new JsonObject();
1544         obj.add("defaultContent", gson.toJsonTree(skippedDefaultContentLocales));
1545         outf.println(gson.toJson(obj));
1546         outf.close();
1547     }
1548 
writeTransformMetadata(String outputDir)1549     public void writeTransformMetadata(String outputDir) throws IOException {
1550         final String dirName = outputDir + "/cldr-" + RunType.transforms.name();
1551         final String fileName = RunType.transforms.name() + ".json";
1552         PrintWriter outf = FileUtilities.openUTF8Writer(dirName, fileName);
1553         System.out.println(
1554                 PACKAGE_ICON
1555                         + " Creating packaging file => "
1556                         + dirName
1557                         + File.separator
1558                         + fileName);
1559         JsonObject obj = new JsonObject();
1560         obj.add(
1561                 RunType.transforms.name(),
1562                 gson.toJsonTree(CLDRTransforms.getInstance().getJsonIndex()));
1563         outf.println(gson.toJson(obj));
1564         outf.close();
1565     }
1566 
writeCoverageLevels(String outputDir)1567     public void writeCoverageLevels(String outputDir) throws IOException {
1568         try (PrintWriter outf =
1569                 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "coverageLevels.json"); ) {
1570             final Map<String, String> covlocs = new TreeMap<>();
1571             System.out.println(
1572                     PACKAGE_ICON
1573                             + " Creating packaging file => "
1574                             + outputDir
1575                             + "/cldr-core"
1576                             + File.separator
1577                             + "coverageLevels.json from coverageLevels.txt");
1578             CalculatedCoverageLevels ccl = CalculatedCoverageLevels.getInstance();
1579             for (final Map.Entry<String, org.unicode.cldr.util.Level> e :
1580                     ccl.getLevels().entrySet()) {
1581                 final String uloc = e.getKey();
1582                 final String level = e.getValue().name().toLowerCase();
1583                 final String bcp47loc = unicodeLocaleToString(uloc);
1584                 if (covlocs.put(bcp47loc, level) != null) {
1585                     throw new IllegalArgumentException(
1586                             "coverageLevels.txt: duplicate locale " + bcp47loc);
1587                 }
1588             }
1589             final Map<String, String> effectiveCovlocs = new TreeMap<>();
1590             avl.full.forEach(
1591                     loc -> {
1592                         final String uloc = ULocale.forLanguageTag(loc).toString();
1593                         final Level lev = ccl.getEffectiveCoverageLevel(uloc);
1594                         if (lev != null) {
1595                             effectiveCovlocs.put(loc, lev.name().toLowerCase());
1596                         }
1597                     });
1598             JsonObject obj = new JsonObject();
1599             // exactly what is in CLDR .txt file
1600             obj.add("coverageLevels", gson.toJsonTree(covlocs));
1601 
1602             // resolved, including all available locales
1603             obj.add("effectiveCoverageLevels", gson.toJsonTree(effectiveCovlocs));
1604             outf.println(gson.toJson(obj));
1605         }
1606     }
1607 
writeAvailableLocales(String outputDir)1608     public void writeAvailableLocales(String outputDir) throws IOException {
1609         PrintWriter outf =
1610                 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "availableLocales.json");
1611         System.out.println(
1612                 PACKAGE_ICON
1613                         + " Creating packaging file => "
1614                         + outputDir
1615                         + "/cldr-core"
1616                         + File.separator
1617                         + "availableLocales.json");
1618         JsonObject obj = new JsonObject();
1619         obj.add("availableLocales", gson.toJsonTree(avl));
1620         outf.println(gson.toJson(obj));
1621         outf.close();
1622     }
1623 
writeScriptMetadata(String outputDir)1624     public void writeScriptMetadata(String outputDir) throws IOException {
1625         PrintWriter outf =
1626                 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "scriptMetadata.json");
1627         System.out.println(
1628                 "Creating script metadata file => "
1629                         + outputDir
1630                         + File.separator
1631                         + "cldr-core"
1632                         + File.separator
1633                         + "scriptMetadata.json");
1634         Map<String, Info> scriptInfo = new TreeMap<>();
1635         for (String script : ScriptMetadata.getScripts()) {
1636             Info i = ScriptMetadata.getInfo(script);
1637             scriptInfo.put(script, i);
1638         }
1639         if (ScriptMetadata.errors.size() > 0) {
1640             System.err.println(Joiner.on("\n\t").join(ScriptMetadata.errors));
1641             // throw new IllegalArgumentException();
1642         }
1643 
1644         JsonObject obj = new JsonObject();
1645         obj.add("scriptMetadata", gson.toJsonTree(scriptInfo));
1646         outf.println(gson.toJson(obj));
1647         outf.close();
1648     }
1649 
writePackageList(String outputDir)1650     public void writePackageList(String outputDir) throws IOException {
1651         final boolean includeModern = Boolean.parseBoolean(options.get("Modern").getValue());
1652         PrintWriter outf =
1653                 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "cldr-packages.json");
1654         System.out.println(
1655                 PACKAGE_ICON
1656                         + " Creating packaging metadata file => "
1657                         + outputDir
1658                         + File.separator
1659                         + "cldr-core"
1660                         + File.separator
1661                         + "cldr-packages.json and PACKAGES.md");
1662         PrintWriter pkgs = FileUtilities.openUTF8Writer(outputDir + "/..", "PACKAGES.md");
1663 
1664         pkgs.println("# CLDR JSON Packages");
1665         pkgs.println();
1666 
1667         LdmlConfigFileReader uberReader = new LdmlConfigFileReader();
1668 
1669         for (RunType r : RunType.values()) {
1670             if (r == RunType.all) continue;
1671             uberReader.read(null, r);
1672         }
1673 
1674         TreeMap<String, String> pkgsToDesc = new TreeMap<>();
1675 
1676         JsonObject obj = new JsonObject();
1677         obj.addProperty("license", CLDRURLS.UNICODE_SPDX);
1678         obj.addProperty("bugs", CLDRURLS.CLDR_NEWTICKET_URL);
1679         obj.addProperty("homepage", CLDRURLS.CLDR_HOMEPAGE);
1680         obj.addProperty("version", pkgVersion);
1681 
1682         JsonArray packages = new JsonArray();
1683         for (Map.Entry<String, String> e : uberReader.getPackageDescriptions().entrySet()) {
1684             final String baseName = e.getKey();
1685 
1686             if (baseName.equals("IGNORE") || baseName.equals("cal")) continue;
1687             if (baseName.equals("core") || baseName.equals("rbnf") || baseName.equals("bcp47")) {
1688                 JsonObject packageEntry = new JsonObject();
1689                 packageEntry.addProperty("description", e.getValue());
1690                 packageEntry.addProperty("name", CLDR_PKG_PREFIX + baseName);
1691                 packages.add(packageEntry);
1692                 pkgsToDesc.put(
1693                         packageEntry.get("name").getAsString(),
1694                         packageEntry.get("description").getAsString());
1695             } else {
1696                 {
1697                     JsonObject packageEntry = new JsonObject();
1698                     packageEntry.addProperty("description", e.getValue());
1699                     packageEntry.addProperty("tier", "full");
1700                     packageEntry.addProperty("name", CLDR_PKG_PREFIX + baseName + FULL_TIER_SUFFIX);
1701                     packages.add(packageEntry);
1702                     pkgsToDesc.put(
1703                             packageEntry.get("name").getAsString(),
1704                             packageEntry.get("description").getAsString());
1705                 }
1706                 if (includeModern) {
1707                     JsonObject packageEntry = new JsonObject();
1708                     packageEntry.addProperty("description", e.getValue() + " modern (deprecated)");
1709                     packageEntry.addProperty("tier", "modern");
1710                     packageEntry.addProperty(
1711                             "name", CLDR_PKG_PREFIX + baseName + MODERN_TIER_SUFFIX);
1712                     packages.add(packageEntry);
1713                     pkgsToDesc.put(
1714                             packageEntry.get("name").getAsString(),
1715                             packageEntry.get("description").getAsString());
1716                 }
1717             }
1718         }
1719         pkgs.println();
1720         for (Map.Entry<String, String> e : pkgsToDesc.entrySet()) {
1721             pkgs.println("### [" + e.getKey() + "](./cldr-json/" + e.getKey() + "/)");
1722             pkgs.println();
1723             if (e.getKey().contains("-modern")) {
1724                 pkgs.println(
1725                         " - **Note: Deprecated** see [CLDR-16465](https://unicode-org.atlassian.net/browse/CLDR-16465).");
1726             }
1727             pkgs.println(" - " + e.getValue());
1728             pkgs.println(" - " + getNpmBadge(e.getKey()));
1729             pkgs.println();
1730         }
1731         obj.add("packages", packages);
1732         outf.println(gson.toJson(obj));
1733         outf.close();
1734         pkgs.println("## JSON Metadata");
1735         pkgs.println();
1736         pkgs.println(
1737                 "Package metadata is available at [`cldr-core`/cldr-packages.json](./cldr-json/cldr-core/cldr-packages.json)");
1738         pkgs.println();
1739 
1740         writeReadmeSection(pkgs);
1741         pkgs.close();
1742     }
1743 
getNpmBadge(final String packageName)1744     private String getNpmBadge(final String packageName) {
1745         return String.format(
1746                 "[![NPM version](https://img.shields.io/npm/v/%s.svg?style=flat)](https://www.npmjs.org/package/%s)",
1747                 packageName, packageName);
1748     }
1749 
1750     /**
1751      * Process the pending sorting items.
1752      *
1753      * @param out The ArrayList to hold all output lines.
1754      * @param nodesForLastItem All the nodes from last item.
1755      * @param sortingItems The item list that should be sorted before output.
1756      * @throws IOException
1757      * @throws ParseException
1758      */
resolveSortingItems( JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> sortingItems)1759     private void resolveSortingItems(
1760             JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> sortingItems)
1761             throws IOException, ParseException {
1762         ArrayList<CldrItem> arrayItems = new ArrayList<>();
1763         String lastLeadingArrayItemPath = null;
1764 
1765         if (!sortingItems.isEmpty()) {
1766             Collections.sort(sortingItems);
1767             for (CldrItem item : sortingItems) {
1768                 Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(item.getPath());
1769                 if (matcher.matches()) {
1770                     String leadingArrayItemPath = matcher.group(1);
1771                     if (lastLeadingArrayItemPath != null
1772                             && !lastLeadingArrayItemPath.equals(leadingArrayItemPath)) {
1773                         resolveArrayItems(out, nodesForLastItem, arrayItems);
1774                     }
1775                     lastLeadingArrayItemPath = leadingArrayItemPath;
1776                     arrayItems.add(item);
1777                 } else {
1778                     outputCldrItem(out, nodesForLastItem, item);
1779                 }
1780             }
1781             sortingItems.clear();
1782             resolveArrayItems(out, nodesForLastItem, arrayItems);
1783         }
1784     }
1785 
1786     /**
1787      * Process the pending array items.
1788      *
1789      * @param out The ArrayList to hold all output lines.
1790      * @param nodesForLastItem All the nodes from last item.
1791      * @param arrayItems The item list that should be output as array.
1792      * @throws IOException
1793      * @throws ParseException
1794      */
resolveArrayItems( JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> arrayItems)1795     private void resolveArrayItems(
1796             JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> arrayItems)
1797             throws IOException, ParseException {
1798         if (!arrayItems.isEmpty()) {
1799             CldrItem firstItem = arrayItems.get(0);
1800             if (firstItem.needsSort()) {
1801                 Collections.sort(arrayItems);
1802                 firstItem = arrayItems.get(0);
1803             }
1804 
1805             int arrayLevel = getArrayIndentLevel(firstItem); // only used for trim
1806 
1807             JsonArray array = outputStartArray(out, nodesForLastItem, firstItem, arrayLevel);
1808 
1809             // Previous statement closed for first element, trim nodesForLastItem
1810             // so that it will not happen again inside.
1811             int len = nodesForLastItem.size();
1812             while (len > arrayLevel) {
1813                 nodesForLastItem.remove(len - 1);
1814                 len--;
1815             }
1816             for (CldrItem insideItem : arrayItems) {
1817                 outputArrayItem(array, insideItem, nodesForLastItem, arrayLevel);
1818             }
1819             arrayItems.clear();
1820 
1821             int lastLevel = nodesForLastItem.size() - 1;
1822             // closeNodes(out, lastLevel, arrayLevel);
1823             // out.endArray();
1824             for (int i = arrayLevel - 1; i < lastLevel; i++) {
1825                 nodesForLastItem.remove(i);
1826             }
1827         }
1828     }
1829 
1830     /**
1831      * Find the indent level on which array should be inserted.
1832      *
1833      * @param item The CldrItem being examined.
1834      * @return The array indent level.
1835      * @throws ParseException
1836      */
getArrayIndentLevel(CldrItem item)1837     private int getArrayIndentLevel(CldrItem item) throws ParseException {
1838         Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(item.getPath());
1839         if (!matcher.matches()) {
1840             System.out.println("No match found for " + item.getPath() + ", this shouldn't happen.");
1841             return 0;
1842         }
1843 
1844         String leadingPath = matcher.group(1);
1845         CldrItem fakeItem = new CldrItem(leadingPath, leadingPath, leadingPath, leadingPath, "");
1846         return fakeItem.getNodesInPath().size() - 1;
1847     }
1848 
1849     /**
1850      * Write the start of an array.
1851      *
1852      * @param out The root object
1853      * @param nodesForLastItem Nodes in path for last CldrItem.
1854      * @param item The CldrItem to be processed.
1855      * @param arrayLevel The level on which array is laid out.
1856      * @throws IOException
1857      * @throws ParseException
1858      */
outputStartArray( JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel)1859     private JsonArray outputStartArray(
1860             JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel)
1861             throws IOException, ParseException {
1862 
1863         ArrayList<CldrNode> nodesInPath = item.getNodesInPath();
1864 
1865         JsonElement o = out;
1866 
1867         // final CldrNode last = nodesInPath.get(nodesInPath.size()-1);
1868 
1869         // Output nodes up to parent of 'arrayLevel'
1870         for (int i = 1; i < arrayLevel - 1; i++) {
1871             final CldrNode node = nodesInPath.get(i);
1872             o = startNonleafNode(o, node);
1873         }
1874 
1875         // at arrayLevel, we have a named Array.
1876         // Get the name of the parent of the array
1877         String objName = nodesInPath.get(arrayLevel - 1).getNodeKeyName();
1878         JsonArray array = new JsonArray();
1879         o.getAsJsonObject().add(objName, array);
1880 
1881         return array;
1882     }
1883 
1884     /**
1885      * Write a CLDR item to file.
1886      *
1887      * <p>"usesMetazone" will be checked to see if it is current. Those non-current item will be
1888      * dropped.
1889      *
1890      * @param out The ArrayList to hold all output lines.
1891      * @param nodesForLastItem
1892      * @param item The CldrItem to be processed.
1893      * @throws IOException
1894      * @throws ParseException
1895      */
outputCldrItem(JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item)1896     private void outputCldrItem(JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item)
1897             throws IOException, ParseException {
1898         // alias has been resolved, no need to keep it.
1899         if (item.isAliasItem()) {
1900             return;
1901         }
1902 
1903         ArrayList<CldrNode> nodesInPath = item.getNodesInPath();
1904         int arraySize = nodesInPath.size();
1905 
1906         int i = 0;
1907         if (i == nodesInPath.size() && type != RunType.rbnf) {
1908             System.err.println(
1909                     "This nodes and last nodes has identical path. ("
1910                             + item.getPath()
1911                             + ") Some distinguishing attributes wrongly removed?");
1912             return;
1913         }
1914 
1915         // close previous nodes
1916         // closeNodes(out, nodesForLastItem.size() - 2, i);
1917         JsonElement o = out;
1918         for (; i < nodesInPath.size() - 1; ++i) {
1919             o = startNonleafNode(o, nodesInPath.get(i));
1920         }
1921 
1922         writeLeafNode(o, nodesInPath.get(i), item.getValue());
1923         nodesForLastItem.clear();
1924         nodesForLastItem.addAll(nodesInPath);
1925     }
1926 
1927     /**
1928      * Start a non-leaf node, adding it if not there.
1929      *
1930      * @param out The input JsonObject
1931      * @param node The node being written.
1932      * @throws IOException
1933      */
startNonleafNode(JsonElement out, final CldrNode node)1934     private JsonElement startNonleafNode(JsonElement out, final CldrNode node) throws IOException {
1935         String objName = node.getNodeKeyName();
1936         // Some node should be skipped as indicated by objName being null.
1937         logger.finest(() -> "objName= " + objName + " for path " + node.getUntransformedPath());
1938         if (objName == null
1939                 || objName.equals("cldr")
1940                 || objName.equals("ldmlBCP47")) { // Skip root 'cldr' node
1941             return out;
1942         }
1943 
1944         Map<String, String> attrAsValueMap = node.getAttrAsValueMap();
1945 
1946         String name;
1947 
1948         if (type == RunType.annotations || type == RunType.annotationsDerived) {
1949             if (objName.startsWith("U+")) {
1950                 // parse U+22 -> "   etc
1951                 name = (com.ibm.icu.text.UTF16.valueOf(Integer.parseInt(objName.substring(2), 16)));
1952             } else {
1953                 name = (objName);
1954             }
1955         } else {
1956             name = (objName);
1957         }
1958 
1959         JsonElement o = out.getAsJsonObject().get(name);
1960 
1961         if (o == null) {
1962             o = new JsonObject();
1963             out.getAsJsonObject().add(name, o);
1964         }
1965 
1966         for (final String key : attrAsValueMap.keySet()) {
1967             logger.finest(() -> "Non-Leaf Node: " + node.getUntransformedPath() + " ." + key);
1968             String rawAttrValue = attrAsValueMap.get(key);
1969             String value = escapeValue(rawAttrValue);
1970             // attribute is prefixed with "_" when being used as key.
1971             String attrAsKey = "_" + key;
1972             if (LdmlConvertRules.attrIsBooleanOmitFalse(
1973                     node.getUntransformedPath(), node.getName(), node.getParent(), key)) {
1974                 final Boolean v = Boolean.parseBoolean(rawAttrValue);
1975                 if (v) {
1976                     o.getAsJsonObject().addProperty(attrAsKey, v);
1977                 } // else, omit
1978             } else {
1979                 // hack for localeRules
1980                 if (attrAsKey.equals("_localeRules")) {
1981                     // find the _localeRules object, add if it didn't exist
1982                     JsonElement localeRules = out.getAsJsonObject().get(attrAsKey);
1983                     if (localeRules == null) {
1984                         localeRules = new JsonObject();
1985                         out.getAsJsonObject().add(attrAsKey, localeRules);
1986                     }
1987                     // find the sibling object, add if it did't exist ( this will be parentLocale or
1988                     // collations etc.)
1989                     JsonElement sibling = localeRules.getAsJsonObject().get(name);
1990                     if (sibling == null) {
1991                         sibling = new JsonObject();
1992                         localeRules.getAsJsonObject().add(name, sibling);
1993                     }
1994                     // get the 'parent' attribute, which wil be the value
1995                     final String parent =
1996                             XPathParts.getFrozenInstance(node.getUntransformedPath())
1997                                     .getAttributeValue(-1, "parent");
1998                     // finally, we add something like "nonLikelyScript: und"
1999                     sibling.getAsJsonObject().addProperty(value, parent);
2000                 } else {
2001                     o.getAsJsonObject().addProperty(attrAsKey, value);
2002                 }
2003             }
2004         }
2005         return o;
2006     }
2007 
2008     /**
2009      * Write a CLDR item to file.
2010      *
2011      * <p>"usesMetazone" will be checked to see if it is current. Those non-current item will be
2012      * dropped.
2013      *
2014      * @param out The ArrayList to hold all output lines.
2015      * @param item The CldrItem to be processed.
2016      * @param nodesForLastItem Nodes in path for last item.
2017      * @param arrayLevel The indentation level in which array exists.
2018      * @throws IOException
2019      * @throws ParseException
2020      */
outputArrayItem( JsonArray out, CldrItem item, ArrayList<CldrNode> nodesForLastItem, int arrayLevel)2021     private void outputArrayItem(
2022             JsonArray out, CldrItem item, ArrayList<CldrNode> nodesForLastItem, int arrayLevel)
2023             throws IOException, ParseException {
2024 
2025         // This method is more complicated that outputCldrItem because it needs to
2026         // handle 3 different cases.
2027         // 1. When difference is found below array item, this item will be of the
2028         // same array item. Inside the array item, it is about the same as
2029         // outputCldrItem, just with one more level of indentation because of
2030         // the array.
2031         // 2. The array item is the leaf item with no attribute, simplify it as
2032         // an object with one name/value pair.
2033         // 3. The array item is the leaf item with attribute, an embedded object
2034         // will be created inside the array item object.
2035 
2036         ArrayList<CldrNode> nodesInPath = item.getNodesInPath();
2037         String value = escapeValue(item.getValue());
2038         int nodesNum = nodesInPath.size();
2039 
2040         // case 1
2041         // int diff = findFirstDiffNodeIndex(nodesForLastItem, nodesInPath);
2042         CldrNode cldrNode = nodesInPath.get(nodesNum - 1);
2043 
2044         // if (diff > arrayLevel) {
2045         //     // close previous nodes
2046         //     closeNodes(out, nodesForLastItem.size() - 1, diff + 1);
2047 
2048         //     for (int i = diff; i < nodesNum - 1; i++) {
2049         //         startNonleafNode(out, nodesInPath.get(i), i + 1);
2050         //     }
2051         //     writeLeafNode(out, cldrNode, value, nodesNum);
2052         //     return;
2053         // }
2054 
2055         if (arrayLevel == nodesNum - 1) {
2056             // case 2
2057             // close previous nodes
2058             // if (nodesForLastItem.size() - 1 - arrayLevel > 0) {
2059             //     closeNodes(out, nodesForLastItem.size() - 1, arrayLevel);
2060             // }
2061 
2062             String objName = cldrNode.getNodeKeyName();
2063             int pos = objName.indexOf('-');
2064             if (pos > 0) {
2065                 objName = objName.substring(0, pos);
2066             }
2067 
2068             Map<String, String> attrAsValueMap = cldrNode.getAttrAsValueMap();
2069 
2070             if (attrAsValueMap.isEmpty()) {
2071                 JsonObject o = new JsonObject();
2072                 out.add(o);
2073                 o.addProperty(objName, value);
2074             } else if (objName.equals("rbnfrule")) {
2075                 writeRbnfLeafNode(out, item, attrAsValueMap);
2076             } else {
2077                 JsonObject o = new JsonObject();
2078                 writeLeafNode(
2079                         o,
2080                         objName,
2081                         attrAsValueMap,
2082                         value,
2083                         cldrNode.getName(),
2084                         cldrNode.getParent(),
2085                         cldrNode);
2086                 out.add(o);
2087             }
2088             // the last node is closed, remove it.
2089             nodesInPath.remove(nodesNum - 1);
2090         } else {
2091             // case 3
2092             // close previous nodes
2093             // if (nodesForLastItem.size() - 1 - (arrayLevel) > 0) {
2094             //     closeNodes(out, nodesForLastItem.size() - 1, arrayLevel);
2095             // }
2096 
2097             JsonObject o = new JsonObject();
2098             out.add(o);
2099 
2100             CldrNode node = nodesInPath.get(arrayLevel);
2101             String objName = node.getNodeKeyName();
2102             int pos = objName.indexOf('-');
2103             if (pos > 0) {
2104                 objName = objName.substring(0, pos);
2105             }
2106             Map<String, String> attrAsValueMap = node.getAttrAsValueMap();
2107             JsonObject oo = new JsonObject();
2108             o.add(objName, oo);
2109             for (String key : attrAsValueMap.keySet()) {
2110                 // attribute is prefixed with "_" when being used as key.
2111                 oo.addProperty("_" + key, escapeValue(attrAsValueMap.get(key)));
2112             }
2113 
2114             JsonElement o2 = out;
2115             System.err.println("PROBLEM at " + cldrNode.getUntransformedPath());
2116             // TODO ?!!
2117             for (int i = arrayLevel + 1; i < nodesInPath.size() - 1; i++) {
2118                 o2 = startNonleafNode(o2, nodesInPath.get(i));
2119             }
2120             writeLeafNode(o2, cldrNode, value);
2121         }
2122 
2123         nodesForLastItem.clear();
2124         nodesForLastItem.addAll(nodesInPath);
2125     }
2126 
writeRbnfLeafNode( JsonElement out, CldrItem item, Map<String, String> attrAsValueMap)2127     private void writeRbnfLeafNode(
2128             JsonElement out, CldrItem item, Map<String, String> attrAsValueMap) throws IOException {
2129         if (attrAsValueMap.size() != 1) {
2130             throw new IllegalArgumentException(
2131                     "Error, attributes seem wrong for RBNF " + item.getUntransformedPath());
2132         }
2133         Entry<String, String> entry = attrAsValueMap.entrySet().iterator().next();
2134         JsonArray arr = new JsonArray();
2135         arr.add(entry.getKey());
2136         arr.add(entry.getValue());
2137         out.getAsJsonArray().add(arr);
2138     }
2139 
progressPrefix( AtomicInteger readCount, int totalCount, String filename, String section)2140     private String progressPrefix(
2141             AtomicInteger readCount, int totalCount, String filename, String section) {
2142         return progressPrefix(readCount.get(), totalCount, filename, section);
2143     }
2144 
progressPrefix(int readCount, int totalCount, String filename, String section)2145     private String progressPrefix(int readCount, int totalCount, String filename, String section) {
2146         return progressPrefix(readCount, totalCount) + filename + "\t" + section + "\t";
2147     }
2148 
progressPrefix(AtomicInteger readCount, int totalCount)2149     private final String progressPrefix(AtomicInteger readCount, int totalCount) {
2150         return progressPrefix(readCount.get(), totalCount);
2151     }
2152 
2153     final LocalizedNumberFormatter percentFormatter =
2154             NumberFormatter.withLocale(Locale.ENGLISH)
2155                     .unit(NoUnit.PERCENT)
2156                     .integerWidth(IntegerWidth.zeroFillTo(3))
2157                     .precision(Precision.integer());
2158 
progressPrefix(int readCount, int totalCount)2159     private final String progressPrefix(int readCount, int totalCount) {
2160         double asPercent = ((double) readCount / (double) totalCount) * 100.0;
2161         return String.format(
2162                 SECTION_ICON + " %s (step %d/%d)\t[%s]:\t",
2163                 type,
2164                 type.ordinal(),
2165                 RunType.values().length
2166                         - 1, // which 'type' are we on? (all=0, minus one to get the count right)
2167                 percentFormatter.format(asPercent));
2168     }
2169 
2170     /**
2171      * Process files in a directory of CLDR file tree.
2172      *
2173      * @param dirName The directory in which xml file will be transformed.
2174      * @param minimalDraftStatus The minimumDraftStatus that will be accepted.
2175      * @throws IOException
2176      * @throws ParseException
2177      */
processDirectory(String dirName, DraftStatus minimalDraftStatus)2178     public void processDirectory(String dirName, DraftStatus minimalDraftStatus)
2179             throws IOException, ParseException {
2180         SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(cldrCommonDir + "supplemental");
2181         Factory cldrFactory = Factory.make(cldrCommonDir + dirName + "/", ".*");
2182         Set<String> files =
2183                 cldrFactory
2184                         .getAvailable()
2185                         // filter these out early so our work count is correct
2186                         .stream()
2187                         .filter(
2188                                 filename ->
2189                                         filename.matches(match)
2190                                                 && !LdmlConvertRules.IGNORE_FILE_SET.contains(
2191                                                         filename))
2192                         .collect(Collectors.toSet());
2193         final int total = files.size();
2194         AtomicInteger readCount = new AtomicInteger(0);
2195         Map<String, Throwable> errs = new TreeMap<>();
2196 
2197         // This takes a long time (minutes, in 2020), so run it in parallel forkJoinPool threads.
2198         // The result of this pipeline is an array of toString()-able filenames of XML files which
2199         // produced no JSON output, just as a warning.
2200         System.out.println(
2201                 progressPrefix(0, total)
2202                         + " "
2203                         + MessageFormat.format(
2204                                 GEAR_ICON
2205                                         + " Beginning parallel process of {0, plural, one {# file} other {# files}}",
2206                                 total));
2207         Object noOutputFiles[] =
2208                 files.parallelStream()
2209                         .unordered()
2210                         .map(
2211                                 filename -> {
2212                                     String pathPrefix;
2213                                     CLDRFile file =
2214                                             cldrFactory.make(
2215                                                     filename,
2216                                                     resolve && type == RunType.main,
2217                                                     minimalDraftStatus);
2218                                     // Print 'reading' after the make, to stagger the output a
2219                                     // little bit.
2220                                     // Otherwise, the printout happens before any work happens, and
2221                                     // is easily out of order.
2222                                     readCount.incrementAndGet();
2223                                     logger.fine(
2224                                             () ->
2225                                                     "<"
2226                                                             + progressPrefix(
2227                                                                     readCount, total, dirName,
2228                                                                     filename)
2229                                                             + "\r");
2230 
2231                                     if (type == RunType.main) {
2232                                         pathPrefix =
2233                                                 "/cldr/"
2234                                                         + dirName
2235                                                         + "/"
2236                                                         + unicodeLocaleToString(filename)
2237                                                         + "/";
2238                                     } else {
2239                                         pathPrefix = "/cldr/" + dirName + "/";
2240                                     }
2241                                     int totalForThisFile = 0;
2242                                     try {
2243                                         totalForThisFile =
2244                                                 convertCldrItems(
2245                                                         readCount,
2246                                                         total,
2247                                                         dirName,
2248                                                         filename,
2249                                                         pathPrefix,
2250                                                         mapPathsToSections(
2251                                                                 readCount,
2252                                                                 total,
2253                                                                 file,
2254                                                                 pathPrefix,
2255                                                                 sdi));
2256                                     } catch (IOException | ParseException t) {
2257                                         t.printStackTrace();
2258                                         System.err.println(
2259                                                 "!"
2260                                                         + progressPrefix(readCount, total)
2261                                                         + filename
2262                                                         + " - err - "
2263                                                         + t);
2264                                         errs.put(filename, t);
2265                                     } finally {
2266                                         logger.fine(
2267                                                 () ->
2268                                                         "."
2269                                                                 + progressPrefix(readCount, total)
2270                                                                 + "Completing "
2271                                                                 + dirName
2272                                                                 + "/"
2273                                                                 + filename);
2274                                     }
2275                                     return new Pair<>(dirName + "/" + filename, totalForThisFile);
2276                                 })
2277                         .filter(p -> p.getSecond() == 0) // filter out only files which produced no
2278                         // output
2279                         .map(p -> p.getFirst())
2280                         .toArray();
2281         System.out.println(
2282                 progressPrefix(total, total)
2283                         + " "
2284                         + DONE_ICON
2285                         + MessageFormat.format(
2286                                 "Completed parallel process of {0, plural, one {# file} other {# files}}",
2287                                 total));
2288         if (noOutputFiles.length > 0) {
2289             System.err.println(
2290                     WARN_ICON
2291                             + MessageFormat.format(
2292                                     " Warning: {0, plural, one {# file} other {# files}} did not produce any output (check JSON config):",
2293                                     noOutputFiles.length));
2294             for (final Object f : noOutputFiles) {
2295                 final String loc = f.toString();
2296                 final String uloc = unicodeLocaleToString(f.toString());
2297                 if (skipBcp47LocalesWithSubtags
2298                         && type.locales()
2299                         && HAS_SUBTAG.matcher(uloc).matches()) {
2300                     System.err.println(
2301                             "\t- " + loc + " ❎ (Skipped due to '-T true': " + uloc + ")");
2302                 } else {
2303                     System.err.println("\t- " + loc);
2304                 }
2305             }
2306         }
2307 
2308         if (!errs.isEmpty()) {
2309             System.err.println("Errors in these files:");
2310             for (Map.Entry<String, Throwable> e : errs.entrySet()) {
2311                 System.err.println(e.getKey() + " - " + e.getValue());
2312             }
2313             // rethrow
2314             for (Map.Entry<String, Throwable> e : errs.entrySet()) {
2315                 if (e.getValue() instanceof IOException) {
2316                     throw (IOException) e.getValue(); // throw the first one
2317                 } else if (e.getValue() instanceof ParseException) {
2318                     throw (ParseException) e.getValue(); // throw the first one
2319                 } else {
2320                     throw new RuntimeException("Other exception thrown: " + e.getValue());
2321                 }
2322                 /* NOTREACHED */
2323             }
2324         }
2325 
2326         if (writePackages) {
2327             for (String currentPackage : packages) {
2328                 writePackagingFiles(outputDir, currentPackage);
2329             }
2330             if (type == RunType.main) {
2331                 writeDefaultContent(outputDir);
2332                 writeAvailableLocales(outputDir);
2333                 writeCoverageLevels(outputDir);
2334             } else if (type == RunType.supplemental) {
2335                 writeScriptMetadata(outputDir);
2336                 if (Boolean.parseBoolean(options.get("packagelist").getValue())) {
2337                     writePackageList(outputDir);
2338                 }
2339             } else if (type == RunType.transforms) {
2340                 writeTransformMetadata(outputDir);
2341             }
2342         }
2343     }
2344 
2345     /** Replacement pattern for escaping. */
2346     private static final Pattern escapePattern = PatternCache.get("\\\\(?!u)");
2347 
2348     /**
2349      * Escape \ in value string. \ should be replaced by \\, except in case of \u1234 In following
2350      * code, \\\\ represent one \, because java compiler and regular expression compiler each do one
2351      * round of escape.
2352      *
2353      * @param value Input string.
2354      * @return escaped string.
2355      */
escapeValue(String value)2356     private String escapeValue(String value) {
2357         Matcher match = escapePattern.matcher(value);
2358         String ret = match.replaceAll("\\\\");
2359         return ret.replace("\n", " ").replace("\t", " ");
2360     }
2361 
2362     /**
2363      * Write the value to output.
2364      *
2365      * @param out The ArrayList to hold all output lines.
2366      * @param node The CldrNode being written.
2367      * @param value The value part for this element.
2368      * @param level Indent level.
2369      * @throws IOException
2370      */
writeLeafNode(JsonElement out, CldrNode node, String value)2371     private void writeLeafNode(JsonElement out, CldrNode node, String value) throws IOException {
2372 
2373         String objName = node.getNodeKeyName();
2374         Map<String, String> attrAsValueMaps = node.getAttrAsValueMap();
2375         writeLeafNode(out, objName, attrAsValueMaps, value, node.getName(), node.getParent(), node);
2376     }
2377 
2378     /**
2379      * Write the value to output.
2380      *
2381      * @param out The ArrayList to hold all output lines.
2382      * @param objName The node's node.
2383      * @param attrAsValueMap Those attributes that will be treated as values.
2384      * @param value The value part for this element.
2385      * @param level Indent level.
2386      * @param nodeName the original nodeName (not distinguished)
2387      * @throws IOException
2388      */
writeLeafNode( JsonElement out, String objName, Map<String, String> attrAsValueMap, String value, final String nodeName, String parent, CldrNode node)2389     private void writeLeafNode(
2390             JsonElement out,
2391             String objName,
2392             Map<String, String> attrAsValueMap,
2393             String value,
2394             final String nodeName,
2395             String parent,
2396             CldrNode node)
2397             throws IOException {
2398         if (objName == null) {
2399             return;
2400         }
2401         value = escapeValue(value);
2402 
2403         final boolean valueIsSpacesepArray =
2404                 LdmlConvertRules.valueIsSpacesepArray(nodeName, parent);
2405         if (attrAsValueMap.isEmpty()) {
2406             // out.name(objName);
2407             if (value.isEmpty()) {
2408                 if (valueIsSpacesepArray) {
2409                     // empty value, output as empty space-sep array: []
2410                     out.getAsJsonObject().add(objName, new JsonArray());
2411                 } else {
2412                     // empty value.
2413                     if (objName.endsWith("SpaceReplacement")) { // foreignSpaceReplacement or
2414                         // nativeSpaceReplacement
2415                         out.getAsJsonObject().addProperty(objName, "");
2416                     } else {
2417                         out.getAsJsonObject().add(objName, new JsonObject());
2418                     }
2419                 }
2420             } else if (type == RunType.annotations || type == RunType.annotationsDerived) {
2421                 JsonArray a = new JsonArray();
2422                 // split this, so "a | b | c" becomes ["a","b","c"]
2423                 for (final String s : Annotations.splitter.split(value.trim())) {
2424                     a.add(s);
2425                 }
2426                 out.getAsJsonObject().add(objName, a);
2427             } else if (valueIsSpacesepArray) {
2428                 outputSpaceSepArray(out, objName, value);
2429             } else {
2430                 // normal value
2431                 out.getAsJsonObject().addProperty(objName, value);
2432             }
2433             return;
2434         }
2435 
2436         // If there is no value, but a attribute being treated as value,
2437         // simplify the output.
2438         if (value.isEmpty() && attrAsValueMap.containsKey(LdmlConvertRules.ANONYMOUS_KEY)) {
2439             String v = attrAsValueMap.get(LdmlConvertRules.ANONYMOUS_KEY);
2440             // out.name(objName);
2441             if (valueIsSpacesepArray) {
2442                 outputSpaceSepArray(out, objName, v);
2443             } else {
2444                 out.getAsJsonObject().addProperty(objName, v);
2445             }
2446             return;
2447         }
2448 
2449         JsonObject o = new JsonObject();
2450         out.getAsJsonObject().add(objName, o);
2451 
2452         if (!value.isEmpty()) {
2453             o.addProperty("_value", value);
2454         }
2455 
2456         for (final String key : attrAsValueMap.keySet()) {
2457             String rawAttrValue = attrAsValueMap.get(key);
2458             String attrValue = escapeValue(rawAttrValue);
2459             // attribute is prefixed with "_" when being used as key.
2460             String attrAsKey = "_" + key;
2461             if (node != null) {
2462                 logger.finest(() -> "Leaf Node: " + node.getUntransformedPath() + " ." + key);
2463             }
2464             if (LdmlConvertRules.ATTRVALUE_AS_ARRAY_SET.contains(key)) {
2465                 String[] strings = attrValue.trim().split("\\s+");
2466                 JsonArray a = new JsonArray();
2467                 o.add(attrAsKey, a);
2468                 for (String s : strings) {
2469                     a.add(s);
2470                 }
2471             } else if (node != null
2472                     && LdmlConvertRules.attrIsBooleanOmitFalse(
2473                             node.getUntransformedPath(), nodeName, parent, key)) {
2474                 final Boolean v = Boolean.parseBoolean(rawAttrValue);
2475                 if (v) {
2476                     o.addProperty(attrAsKey, v);
2477                 } // else: omit falsy value
2478             } else {
2479                 o.addProperty(attrAsKey, attrValue);
2480             }
2481         }
2482     }
2483 
outputSpaceSepArray(JsonElement out, String objName, String v)2484     private void outputSpaceSepArray(JsonElement out, String objName, String v) throws IOException {
2485         JsonArray a = new JsonArray();
2486         out.getAsJsonObject().add(objName, a);
2487         // split this, so "a b c" becomes ["a","b","c"]
2488         for (final String s : v.trim().split(" ")) {
2489             if (!s.isEmpty()) {
2490                 a.add(s);
2491             }
2492         }
2493     }
2494 }
2495