• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.json;
2 
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.PrintWriter;
6 import java.text.ParseException;
7 import java.util.ArrayList;
8 import java.util.Collections;
9 import java.util.HashMap;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.Set;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
16 import java.util.concurrent.atomic.AtomicInteger;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19 
20 import org.unicode.cldr.draft.FileUtilities;
21 import org.unicode.cldr.draft.ScriptMetadata;
22 import org.unicode.cldr.draft.ScriptMetadata.Info;
23 import org.unicode.cldr.tool.Option.Options;
24 import org.unicode.cldr.util.Annotations;
25 import org.unicode.cldr.util.CLDRConfig;
26 import org.unicode.cldr.util.CLDRFile;
27 import org.unicode.cldr.util.CLDRFile.DraftStatus;
28 import org.unicode.cldr.util.CLDRPaths;
29 import org.unicode.cldr.util.CLDRTool;
30 import org.unicode.cldr.util.CldrUtility;
31 import org.unicode.cldr.util.CoverageInfo;
32 import org.unicode.cldr.util.DtdData;
33 import org.unicode.cldr.util.DtdType;
34 import org.unicode.cldr.util.Factory;
35 import org.unicode.cldr.util.FileProcessor;
36 import org.unicode.cldr.util.Level;
37 import org.unicode.cldr.util.LocaleIDParser;
38 import org.unicode.cldr.util.PatternCache;
39 import org.unicode.cldr.util.StandardCodes;
40 import org.unicode.cldr.util.SupplementalDataInfo;
41 import org.unicode.cldr.util.XPathParts;
42 
43 import com.google.common.base.Joiner;
44 import com.google.gson.Gson;
45 import com.google.gson.GsonBuilder;
46 import com.google.gson.JsonArray;
47 import com.google.gson.JsonObject;
48 import com.google.gson.JsonPrimitive;
49 import com.google.gson.stream.JsonWriter;
50 
51 /**
52  * Utility methods to extract data from CLDR repository and export it in JSON
53  * format.
54  *
55  * @author shanjian / emmons
56  */
57 @CLDRTool(alias = "ldml2json", description = "Convert CLDR data to JSON")
58 public class Ldml2JsonConverter {
59     private static boolean DEBUG = false;
60 
61     private enum RunType {
62         main, supplemental, segments, rbnf, annotations, annotationsDerived
63     }
64 
65     private static final StandardCodes sc = StandardCodes.make();
66     private Set<String> defaultContentLocales = SupplementalDataInfo.getInstance().getDefaultContentLocales();
67     private Set<String> skippedDefaultContentLocales = new TreeSet<>();
68 
69     private class availableLocales {
70         Set<String> modern = new TreeSet<>();
71         Set<String> full = new TreeSet<>();
72     }
73 
74     private availableLocales avl = new availableLocales();
75     private Gson gson = new GsonBuilder().setPrettyPrinting().create();
76     private static final Options options = new Options(
77         "Usage: LDML2JsonConverter [OPTIONS] [FILES]\n" +
78             "This program converts CLDR data to the JSON format.\n" +
79             "Please refer to the following options. \n" +
80             "\texample: org.unicode.cldr.json.Ldml2JsonConverter -c xxx -d yyy")
81                 .add("commondir", 'c', ".*", CLDRPaths.COMMON_DIRECTORY,
82                     "Common directory for CLDR files, defaults to CldrUtility.COMMON_DIRECTORY")
83                 .add("destdir", 'd', ".*", CLDRPaths.GEN_DIRECTORY,
84                     "Destination directory for output files, defaults to CldrUtility.GEN_DIRECTORY")
85                 .add("match", 'm', ".*", ".*",
86                     "Regular expression to define only specific locales or files to be generated")
87                 .add("type", 't', "(main|supplemental|segments|rbnf|annotations|annotationsDerived)", "main",
88                     "Type of CLDR data being generated, main, supplemental, or segments.")
89                 .add("resolved", 'r', "(true|false)", "false",
90                     "Whether the output JSON for the main directory should be based on resolved or unresolved data")
91                 .add("draftstatus", 's', "(approved|contributed|provisional|unconfirmed)", "unconfirmed",
92                     "The minimum draft status of the output data")
93                 .add("coverage", 'l', "(minimal|basic|moderate|modern|comprehensive|optional)", "optional",
94                     "The maximum coverage level of the output data")
95                 .add("fullnumbers", 'n', "(true|false)", "false",
96                     "Whether the output JSON should output data for all numbering systems, even those not used in the locale")
97                 .add("other", 'o', "(true|false)", "false",
98                     "Whether to write out the 'other' section, which contains any unmatched paths")
99                 .add("packages", 'p', "(true|false)", "false",
100                     "Whether to group data files into installable packages")
101                 .add("identity", 'i', "(true|false)", "true",
102                     "Whether to copy the identity info into all sections containing data")
103                 .add("konfig", 'k', ".*", null, "LDML to JSON configuration file");
104 
main(String[] args)105     public static void main(String[] args) throws Exception {
106         options.parse(args, true);
107 
108         Ldml2JsonConverter l2jc = new Ldml2JsonConverter(
109             options.get("commondir").getValue(),
110             options.get("destdir").getValue(),
111             options.get("type").getValue(),
112             Boolean.parseBoolean(options.get("fullnumbers").getValue()),
113             Boolean.parseBoolean(options.get("resolved").getValue()),
114             options.get("coverage").getValue(),
115             options.get("match").getValue(),
116             Boolean.parseBoolean(options.get("packages").getValue()),
117             options.get("konfig").getValue());
118 
119         long start = System.currentTimeMillis();
120         DraftStatus status = DraftStatus.valueOf(options.get("draftstatus").getValue());
121         l2jc.processDirectory(options.get("type").getValue(), status);
122         long end = System.currentTimeMillis();
123         System.out.println("Finished in " + (end - start) + " ms");
124     }
125 
126     // The CLDR file directory where those official XML files will be found.
127     private String cldrCommonDir;
128     // Where the generated JSON files will be stored.
129     private String outputDir;
130     // Whether data in main should output all numbering systems, even those not in use in the locale.
131     private boolean fullNumbers;
132     // Whether data in main should be resolved for output.
133     private boolean resolve;
134     // Used to match specific locales for output
135     private String match;
136     // Used to filter based on coverage
137     private int coverageValue;
138     // Whether we should write output files into installable packages
139     private boolean writePackages;
140     // Type of run for this converter: main, supplemental, or segments
141     private RunType type;
142 
143     private class JSONSection implements Comparable<JSONSection> {
144         public String section;
145         public Pattern pattern;
146         public String packageName;
147 
148         @Override
compareTo(JSONSection other)149         public int compareTo(JSONSection other) {
150             return section.compareTo(other.section);
151         }
152 
153     }
154 
155     private Map<String, String> dependencies;
156     private List<JSONSection> sections;
157     private Set<String> packages;
158 
Ldml2JsonConverter(String cldrDir, String outputDir, String runType, boolean fullNumbers, boolean resolve, String coverage, String match, boolean writePackages, String configFile)159     public Ldml2JsonConverter(String cldrDir, String outputDir, String runType, boolean fullNumbers, boolean resolve, String coverage, String match,
160         boolean writePackages, String configFile) {
161         this.cldrCommonDir = cldrDir;
162         this.outputDir = outputDir;
163         this.type = RunType.valueOf(runType);
164         this.fullNumbers = fullNumbers;
165         this.resolve = resolve;
166         this.match = match;
167         this.writePackages = writePackages;
168         this.coverageValue = Level.get(coverage).getLevel();
169 
170         sections = new ArrayList<>();
171         packages = new TreeSet<>();
172         dependencies = new HashMap<>();
173 
174         FileProcessor myReader = new FileProcessor() {
175             @Override
176             protected boolean handleLine(int lineCount, String line) {
177                 String[] lineParts = line.trim().split("\\s*;\\s*");
178                 String key, value, section = null, path = null, packageName = null, dependency = null;
179                 boolean hasSection = false;
180                 boolean hasPath = false;
181                 boolean hasPackage = false;
182                 boolean hasDependency = false;
183                 for (String linePart : lineParts) {
184                     int pos = linePart.indexOf('=');
185                     if (pos < 0) {
186                         throw new IllegalArgumentException();
187                     }
188                     key = linePart.substring(0, pos);
189                     value = linePart.substring(pos + 1);
190                     if (key.equals("section")) {
191                         hasSection = true;
192                         section = value;
193                     } else if (key.equals("path")) {
194                         hasPath = true;
195                         path = value;
196                     } else if (key.equals("package")) {
197                         hasPackage = true;
198                         packageName = value;
199                     } else if (key.equals("dependency")) {
200                         hasDependency = true;
201                         dependency = value;
202                     }
203                 }
204                 if (hasSection && hasPath) {
205                     JSONSection j = new JSONSection();
206                     j.section = section;
207                     j.pattern = PatternCache.get(path);
208                     if (hasPackage) {
209                         j.packageName = packageName;
210                     }
211                     sections.add(j);
212                 }
213                 if (hasDependency && hasPackage) {
214                     dependencies.put(packageName, dependency);
215                 }
216                 return true;
217             }
218         };
219 
220         if (configFile != null) {
221             myReader.process(configFile);
222         } else {
223             switch (type) {
224             case main:
225                 myReader.process(Ldml2JsonConverter.class, "JSON_config.txt");
226                 break;
227             case supplemental:
228                 myReader.process(Ldml2JsonConverter.class, "JSON_config_supplemental.txt");
229                 break;
230             case segments:
231                 myReader.process(Ldml2JsonConverter.class, "JSON_config_segments.txt");
232                 break;
233             case rbnf:
234                 myReader.process(Ldml2JsonConverter.class, "JSON_config_rbnf.txt");
235                 break;
236             default:
237                 myReader.process(Ldml2JsonConverter.class, "JSON_config_"+type.name()+".txt");
238             }
239         }
240 
241         // Add a section at the end of the list that will match anything not already matched.
242         JSONSection j = new JSONSection();
243         j.section = "other";
244         j.pattern = PatternCache.get(".*");
245         sections.add(j);
246 
247     }
248 
249     /**
250      * @see XPathParts#addInternal
251      */
252     static final Pattern ANNOTATION_CP_REMAP = PatternCache.get("^(.*)\\[@cp=\"(\\[|\\]|'|\"|@|/|=)\"\\](.*)$");
253     /**
254      * Transform the path by applying PATH_TRANSFORMATIONS rules.
255      *
256      * @param pathStr
257      *            The path string being transformed.
258      * @return The transformed path.
259      */
transformPath(final String pathStr, final String pathPrefix)260     private String transformPath(final String pathStr, final String pathPrefix) {
261         String result = pathStr;
262 
263         // handle annotation cp value
264         Matcher cpm = ANNOTATION_CP_REMAP.matcher(result);
265         if( cpm.matches() ) {
266             // We need to avoid breaking the syntax not just of JSON, but of XPATH.
267             final String badCodepointRange = cpm.group(2);
268             StringBuilder sb = new StringBuilder(cpm.group(1))
269                 .append("[@cp=\"");
270             // JSON would handle a wide range of things if escaped, but XPATH will not.
271             if(badCodepointRange.codePointCount(0, badCodepointRange.length()) != 1) {
272                 // forbid more than one U+ (because we will have to unescape it.)
273                 throw new IllegalArgumentException("Need exactly one codepoint in the @cp string, but got " + badCodepointRange + " in xpath " + pathStr);
274             }
275             badCodepointRange.codePoints().forEach(cp -> sb.append("U+").append(Integer.toHexString(cp).toUpperCase()));
276             sb.append("\"]").append(cpm.group(3));
277             result = sb.toString();
278         }
279 
280         if (DEBUG) {
281             System.out.println(" IN pathStr : " + result);
282         }
283         Matcher m;
284         for (int i = 0; i < LdmlConvertRules.PATH_TRANSFORMATIONS.length; i++) {
285             m = LdmlConvertRules.PATH_TRANSFORMATIONS[i].pattern.matcher(result);
286             if (m.matches()) {
287                 if (DEBUG) {
288                     System.out.println(LdmlConvertRules.PATH_TRANSFORMATIONS[i].pattern);
289                 }
290                 result = m.replaceFirst(LdmlConvertRules.PATH_TRANSFORMATIONS[i].replacement);
291                 break;
292             }
293         }
294         result = result.replaceFirst("/ldml/", pathPrefix);
295         result = result.replaceFirst("/supplementalData/", pathPrefix);
296 
297         if (result.contains("languages") ||
298             result.contains("languageAlias") ||
299             result.contains("languageMatches") ||
300             result.contains("likelySubtags") ||
301             result.contains("parentLocale") ||
302             result.contains("locales=")) {
303             result = result.replaceAll("_", "-");
304         }
305         if (DEBUG) {
306             System.out.println("OUT pathStr : " + result);
307         }
308 
309         if (DEBUG) {
310             System.out.println("result: " + result);
311         }
312         return result;
313     }
314 
mapPathsToSections(AtomicInteger readCount, int totalCount, CLDRFile file, String pathPrefix, SupplementalDataInfo sdi)315     private Map<JSONSection, List<CldrItem>> mapPathsToSections(AtomicInteger readCount, int totalCount,
316             CLDRFile file, String pathPrefix, SupplementalDataInfo sdi)
317         throws IOException, ParseException {
318         final Map<JSONSection, List<CldrItem>> sectionItems = new TreeMap<>();
319 
320         String locID = file.getLocaleID();
321         Matcher noNumberingSystemMatcher = LdmlConvertRules.NO_NUMBERING_SYSTEM_PATTERN.matcher("");
322         Matcher numberingSystemMatcher = LdmlConvertRules.NUMBERING_SYSTEM_PATTERN.matcher("");
323         Matcher rootIdentityMatcher = LdmlConvertRules.ROOT_IDENTITY_PATTERN.matcher("");
324         Set<String> activeNumberingSystems = new TreeSet<>();
325         activeNumberingSystems.add("latn"); // Always include latin script numbers
326         for (String np : LdmlConvertRules.ACTIVE_NUMBERING_SYSTEM_XPATHS) {
327             String ns = file.getWinningValue(np);
328             if (ns != null && ns.length() > 0) {
329                 activeNumberingSystems.add(ns);
330             }
331         }
332         DtdType fileDtdType;
333         if (CLDRFile.isSupplementalName(locID)) {
334             fileDtdType = DtdType.supplementalData;
335         } else {
336             fileDtdType = DtdType.ldml;
337         }
338         CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo();
339         for (Iterator<String> it = file.iterator("", DtdData.getInstance(fileDtdType).getDtdComparator(null)); it.hasNext();) {
340             int cv = Level.UNDETERMINED.getLevel();
341             final String path = it.next();
342             String fullPath = file.getFullXPath(path);
343             String value = file.getWinningValue(path);
344             if (path.startsWith("//ldml/localeDisplayNames/languages") &&
345                 file.getSourceLocaleID(path, null).equals("code-fallback")) {
346                 value = file.getConstructedBaileyValue(path, null, null);
347             }
348 
349             if (fullPath == null) {
350                 fullPath = path;
351             }
352 
353             if (!CLDRFile.isSupplementalName(locID) && path.startsWith("//ldml/") && !path.contains("/identity")) {
354                 cv = covInfo.getCoverageValue(path, locID);
355             }
356             if (cv > coverageValue) {
357                 continue;
358             }
359             // Discard root identity element unless the locale is root
360             rootIdentityMatcher.reset(fullPath);
361             if (rootIdentityMatcher.matches() && !"root".equals(locID)) {
362                 continue;
363             }
364 
365             // automatically filter out number symbols and formats without a numbering system
366             noNumberingSystemMatcher.reset(fullPath);
367             if (noNumberingSystemMatcher.matches()) {
368                 continue;
369             }
370 
371             // Filter out non-active numbering systems data unless fullNumbers is specified.
372             numberingSystemMatcher.reset(fullPath);
373             if (numberingSystemMatcher.matches() && !fullNumbers) {
374                 XPathParts xpp = XPathParts.getFrozenInstance(fullPath);
375                 String currentNS = xpp.getAttributeValue(2, "numberSystem");
376                 if (currentNS != null && !activeNumberingSystems.contains(currentNS)) {
377                     continue;
378                 }
379             }
380 
381             // Handle the no inheritance marker.
382             if (resolve && CldrUtility.NO_INHERITANCE_MARKER.equals(value)) {
383                 continue;
384             }
385 
386             String transformedPath = transformPath(path, pathPrefix);
387             String transformedFullPath = transformPath(fullPath, pathPrefix);
388 
389             if(transformedPath.isEmpty()) {
390                 continue; // skip this path
391             }
392 
393             for (JSONSection js : sections) {
394                 if (js.pattern.matcher(transformedPath).matches()) {
395                     CldrItem item = new CldrItem(transformedPath, transformedFullPath, path, fullPath, value);
396 
397                     List<CldrItem> cldrItems = sectionItems.get(js);
398                     if (cldrItems == null) {
399                         cldrItems = new ArrayList<>();
400                     }
401                     cldrItems.add(item);
402                     sectionItems.put(js, cldrItems);
403                     break;
404                 }
405             }
406         }
407 
408         Matcher versionInfoMatcher = PatternCache.get(".*/(identity|version).*").matcher("");
409         // Automatically copy the version info to any sections that had real data in them.
410         JSONSection otherSection = sections.get(sections.size() - 1);
411         List<CldrItem> others = sectionItems.get(otherSection);
412         if (others == null) {
413             return sectionItems;
414         }
415         List<CldrItem> otherSectionItems = new ArrayList<>(others);
416         int addedItemCount = 0;
417         boolean copyIdentityInfo = Boolean.parseBoolean(options.get("identity").getValue());
418 
419         for (CldrItem item : otherSectionItems) {
420             String thisPath = item.getPath();
421             versionInfoMatcher.reset(thisPath);
422             if (versionInfoMatcher.matches()) {
423                 for (JSONSection js : sections) {
424                     if (sectionItems.get(js) != null && !js.section.equals("other") && copyIdentityInfo) {
425                         List<CldrItem> hit = sectionItems.get(js);
426                         hit.add(addedItemCount, item);
427                         sectionItems.put(js, hit);
428                     }
429                     if (js.section.equals("other")) {
430                         List<CldrItem> hit = sectionItems.get(js);
431                         hit.remove(item);
432                         sectionItems.put(js, hit);
433                     }
434                 }
435                 addedItemCount++;
436             }
437         }
438         return sectionItems;
439     }
440 
441     /**
442      * Convert CLDR's XML data to JSON format.
443      *
444      * @param file
445      *            CLDRFile object.
446      * @param outFilename
447      *            The file name used to save JSON data.
448      * @throws IOException
449      * @throws ParseException
450      */
convertCldrItems(AtomicInteger readCount, int totalCount, String dirName, String filename, String pathPrefix, final Map<JSONSection, List<CldrItem>> sectionItems)451     private void convertCldrItems(AtomicInteger readCount, int totalCount,
452             String dirName, String filename, String pathPrefix,
453             final Map<JSONSection, List<CldrItem>> sectionItems)
454         throws IOException, ParseException {
455         // zone and timezone items are queued for sorting first before they are
456         // processed.
457 
458         for (JSONSection js : sections) {
459             String outFilename;
460             if (type == RunType.rbnf) {
461                 outFilename = filename.replaceAll("_", "-") + ".json";
462             } else {
463                 outFilename = js.section + ".json";
464             }
465             String tier = "";
466             boolean writeOther = Boolean.parseBoolean(options.get("other").getValue());
467             if (js.section.equals("other") && !writeOther) {
468                 continue;
469             } else {
470                 StringBuilder outputDirname = new StringBuilder(outputDir);
471                 if (writePackages) {
472                     if (type != RunType.supplemental && type != RunType.rbnf) {
473                         LocaleIDParser lp = new LocaleIDParser();
474                         lp.set(filename);
475                         if (defaultContentLocales.contains(filename) &&
476                             lp.getRegion().length() > 0) {
477                             if (type == RunType.main) {
478                                 skippedDefaultContentLocales.add(filename.replaceAll("_", "-"));
479                             }
480                             continue;
481                         }
482                         Level localeCoverageLevel = sc.getLocaleCoverageLevel("Cldr", filename);
483                         if (localeCoverageLevel == Level.MODERN || filename.equals("root")) {
484                             tier = "-modern";
485                             if (type == RunType.main) {
486                                 avl.modern.add(filename.replaceAll("_", "-"));
487                             }
488                         } else {
489                             tier = "-full";
490                         }
491                         if (type == RunType.main) {
492                             avl.full.add(filename.replaceAll("_", "-"));
493                         }
494                     } else if (type == RunType.rbnf) {
495                         js.packageName = "rbnf";
496                         tier = "";
497                     }
498                     if (js.packageName != null) {
499                         String packageName = "cldr-" + js.packageName + tier;
500                         outputDirname.append("/" + packageName);
501                         packages.add(packageName);
502                     }
503                     outputDirname.append("/" + dirName + "/");
504                     if (type != RunType.supplemental && type != RunType.rbnf) {
505                         outputDirname.append(filename.replaceAll("_", "-"));
506                     }
507                     if (DEBUG) {
508                         System.out.println("outDir: " + outputDirname);
509                         System.out.println("pack: " + js.packageName);
510                         System.out.println("dir: " + dirName);
511                     }
512                 } else {
513                     outputDirname.append("/" + filename);
514                 }
515 
516                 File dir = new File(outputDirname.toString());
517                 if (!dir.exists()) {
518                     dir.mkdirs();
519                 }
520 
521                 List<String> outputDirs = new ArrayList<>();
522                 outputDirs.add(outputDirname.toString());
523                 if (writePackages && type == RunType.main && tier.equals("-modern")) {
524                     outputDirs.add(outputDirname.toString().replaceFirst("-modern", "-full"));
525                 }
526 
527                 for (String outputDir : outputDirs) {
528                     List<CldrItem> theItems = sectionItems.get(js);
529                     if (theItems == null || theItems.size() == 0) {
530                         System.out.println(">"+progressPrefix(readCount, totalCount) +
531                             outputDir + " - no items to write");
532                         continue;
533                     }
534                     System.out.println("?"+progressPrefix(readCount, totalCount) + outputDir + " - " + theItems.size() + " item(s) to write.");
535                     PrintWriter outf = FileUtilities.openUTF8Writer(outputDir, outFilename);
536                     JsonWriter out = new JsonWriter(outf);
537                     out.setIndent("  ");
538 
539                     ArrayList<CldrItem> sortingItems = new ArrayList<>();
540                     ArrayList<CldrItem> arrayItems = new ArrayList<>();
541 
542                     ArrayList<CldrNode> nodesForLastItem = new ArrayList<>();
543                     String lastLeadingArrayItemPath = null;
544                     String leadingArrayItemPath = "";
545                     int valueCount = 0;
546                     String previousIdentityPath = null;
547                     for (CldrItem item : theItems) {
548                         if(item.getPath().isEmpty()) {
549                             throw new IllegalArgumentException("empty xpath in " + filename + " section " + js.packageName+"/"+js.section);
550                         }
551                         if (type == RunType.rbnf) {
552                             item.setValue(item.getValue().replace('→', '>'));
553                             item.setValue(item.getValue().replace('←', '<'));
554                             if (item.getFullPath().contains("@value")) {
555                                 int indexStart = item.getFullPath().indexOf("@value") + 8;
556                                 int indexEnd = item.getFullPath().indexOf("]", indexStart) - 1;
557                                 if (indexStart >= 0 && indexEnd >= 0 && indexEnd > indexStart) {
558                                     String sub = item.getFullPath().substring(indexStart, indexEnd);
559                                     /* System.out.println("sub: " + sub);
560                                     System.out.println("full: " + item.getFullPath());
561                                     System.out.println("val: " + item.getValue());*/
562                                     item.setFullPath(item.getFullPath().replace(sub, item.getValue()));
563                                     item.setFullPath(item.getFullPath().replaceAll("@value", "@" + sub));
564                                     //System.out.println("modifyfull: " + item.getFullPath());
565                                     item.setValue("");
566                                 }
567                             }
568 
569                         }
570                         // ADJUST ACCESS=PRIVATE/PUBLIC BASED ON ICU RULE -- START
571                         if (type == RunType.rbnf) {
572                             String fullpath = item.getFullPath();
573                             if (fullpath.contains("/ruleset")) {
574                                 int ruleStartIndex = fullpath.indexOf("/ruleset[");
575                                 String checkString = fullpath.substring(ruleStartIndex);
576 
577                                 int ruleEndIndex = 0;
578                                 if (checkString.contains("/")) {
579                                     ruleEndIndex = fullpath.indexOf("/", ruleStartIndex + 1);
580                                 }
581                                 if (ruleEndIndex > ruleStartIndex) {
582                                     String oldRulePath = fullpath.substring(ruleStartIndex, ruleEndIndex);
583 
584                                     String newRulePath = oldRulePath;
585                                     if (newRulePath.contains("@type")) {
586                                         int typeIndexStart = newRulePath.indexOf("\"", newRulePath.indexOf("@type"));
587                                         int typeIndexEnd = newRulePath.indexOf("\"", typeIndexStart + 1);
588                                         String type = newRulePath.substring(typeIndexStart + 1, typeIndexEnd);
589 
590                                         String newType = "";
591                                         if (newRulePath.contains("@access")) {
592                                             newType = "%%" + type;
593                                         } else {
594                                             newType = "%" + type;
595                                         }
596                                         newRulePath = newRulePath.replace(type, newType);
597                                         item.setPath(item.getPath().replace(type, newType));
598                                     }
599                                     fullpath = fullpath.replace(oldRulePath, newRulePath);
600                                     item.setFullPath(fullpath);
601 
602                                 }
603                             }
604                         }
605                         // ADJUST ACCESS=PRIVATE/PUBLIC BASED ON ICU RULE -- END
606 
607                         // items in the identity section of a file should only ever contain the lowest level, even if using
608                         // resolving source, so if we have duplicates ( caused by attributes used as a value ) then suppress
609                         // them here.
610                         if (item.getPath().contains("/identity/")) {
611                             String[] parts = item.getPath().split("\\[");
612                             if (parts[0].equals(previousIdentityPath)) {
613                                 continue;
614                             } else {
615                                 XPathParts xpp = XPathParts.getFrozenInstance(item.getPath());
616                                 String territory = xpp.findAttributeValue("territory", "type");
617                                 LocaleIDParser lp = new LocaleIDParser().set(filename);
618                                 if (territory != null && territory.length() > 0 && !territory.equals(lp.getRegion())) {
619                                     continue;
620                                 }
621                                 previousIdentityPath = parts[0];
622                             }
623                         }
624 
625                         // some items need to be split to multiple item before processing. None
626                         // of those items need to be sorted.
627                         CldrItem[] items = item.split();
628                         if (items == null) {
629                             items = new CldrItem[1];
630                             items[0] = item;
631                         }
632                         valueCount += items.length;
633 
634                         for (CldrItem newItem : items) {
635                             // alias will be dropped in conversion, don't count it.
636                             if (newItem.isAliasItem()) {
637                                 valueCount--;
638                             }
639 
640                             // Items like zone items need to be sorted first before write them out.
641                             if (newItem.needsSort()) {
642                                 resolveArrayItems(out, nodesForLastItem, arrayItems);
643                                 sortingItems.add(newItem);
644                             } else {
645                                 Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(
646                                     newItem.getPath());
647                                 if (matcher.matches()) {
648                                     resolveSortingItems(out, nodesForLastItem, sortingItems);
649                                     leadingArrayItemPath = matcher.group(1);
650                                     if (lastLeadingArrayItemPath != null &&
651                                         !lastLeadingArrayItemPath.equals(leadingArrayItemPath)) {
652                                         resolveArrayItems(out, nodesForLastItem, arrayItems);
653                                     }
654                                     lastLeadingArrayItemPath = leadingArrayItemPath;
655                                     arrayItems.add(newItem);
656                                 } else {
657                                     resolveSortingItems(out, nodesForLastItem, sortingItems);
658                                     resolveArrayItems(out, nodesForLastItem, arrayItems);
659                                     outputCldrItem(out, nodesForLastItem, newItem);
660                                     lastLeadingArrayItemPath = "";
661                                 }
662                             }
663                         }
664                     }
665 
666                     resolveSortingItems(out, nodesForLastItem, sortingItems);
667                     resolveArrayItems(out, nodesForLastItem, arrayItems);
668                     System.out.println(">"+progressPrefix(readCount, totalCount) + String.format(".../%s/%s\t= %d values",
669                         dir.getPath().substring(this.outputDir.length()+1), outFilename, valueCount));
670                     closeNodes(out, nodesForLastItem.size() - 2, 0);
671                     outf.println();
672                     out.close();
673                 }
674             }
675         }
676     }
677 
678     /**
679      * Creates the packaging files ( i.e. package.json ) for a particular package
680      *
681      * @param packageName
682      *            The name of the installable package
683      */
writePackagingFiles(String outputDir, String packageName)684     public void writePackagingFiles(String outputDir, String packageName) throws IOException {
685         writePackageJson(outputDir, packageName);
686         writeBowerJson(outputDir, packageName);
687     }
688 
writeBasicInfo(JsonObject obj, String packageName, boolean isNPM)689     public void writeBasicInfo(JsonObject obj, String packageName, boolean isNPM) {
690 
691         obj.addProperty("name", packageName);
692         String versionString = CLDRFile.GEN_VERSION;
693         while (versionString.split("\\.").length < 3) {
694             versionString = versionString + ".0";
695         }
696         obj.addProperty("version", versionString);
697 
698         String[] packageNameParts = packageName.split("-");
699         String dependency = dependencies.get(packageNameParts[1]);
700         if (dependency != null) {
701             String[] dependentPackageNames = new String[1];
702             String tier = packageNameParts[packageNameParts.length - 1];
703             if (dependency.equals("core")) {
704                 dependentPackageNames[0] = "cldr-core";
705             } else {
706                 dependentPackageNames[0] = "cldr-" + dependency + "-" + tier;
707             }
708 
709             JsonObject dependencies = new JsonObject();
710             for (String dependentPackageName : dependentPackageNames) {
711                 if (dependentPackageName != null) {
712                     dependencies.addProperty(dependentPackageName, versionString);
713                 }
714             }
715             obj.add(isNPM ? "peerDependencies" : "dependencies", dependencies);
716         }
717     }
718 
writePackageJson(String outputDir, String packageName)719     public void writePackageJson(String outputDir, String packageName) throws IOException {
720         PrintWriter outf = FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "package.json");
721         System.out.println("Creating packaging file => " + outputDir + File.separator + packageName + File.separator + "package.json");
722         JsonObject obj = new JsonObject();
723         writeBasicInfo(obj, packageName, true);
724 
725         JsonArray maintainers = new JsonArray();
726         JsonObject primaryMaintainer = new JsonObject();
727 
728         obj.addProperty("homepage", "http://cldr.unicode.org");
729         obj.addProperty("author", "The Unicode Consortium");
730 
731         primaryMaintainer.addProperty("name", "John Emmons");
732         primaryMaintainer.addProperty("email", "emmo@us.ibm.com");
733         primaryMaintainer.addProperty("url", "https://github.com/JCEmmons");
734         maintainers.add(primaryMaintainer);
735         obj.add("maintainers", maintainers);
736 
737         JsonObject repository = new JsonObject();
738         repository.addProperty("type", "git");
739         repository.addProperty("url", "git://github.com/unicode-cldr/" + packageName + ".git");
740         obj.add("repository", repository);
741 
742         obj.addProperty("license", "Unicode-DFS-2016");
743 
744         obj.addProperty("bugs", "https://unicode-org.atlassian.net/projects/CLDR/issues");
745 
746         outf.println(gson.toJson(obj));
747         outf.close();
748     }
749 
writeBowerJson(String outputDir, String packageName)750     public void writeBowerJson(String outputDir, String packageName) throws IOException {
751         PrintWriter outf = FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "bower.json");
752         System.out.println("Creating packaging file => " + outputDir + File.separator + packageName + File.separator + "bower.json");
753         JsonObject obj = new JsonObject();
754         writeBasicInfo(obj, packageName, false);
755         if (type == RunType.supplemental) {
756             JsonArray mainPaths = new JsonArray();
757             mainPaths.add(new JsonPrimitive("availableLocales.json"));
758             mainPaths.add(new JsonPrimitive("defaultContent.json"));
759             mainPaths.add(new JsonPrimitive("scriptMetadata.json"));
760             mainPaths.add(new JsonPrimitive(type.toString() + "/*.json"));
761             obj.add("main", mainPaths);
762         } else if (type == RunType.rbnf) {
763             obj.addProperty("main", type.toString() + "/*.json");
764         } else {
765             obj.addProperty("main", type.toString() + "/**/*.json");
766         }
767 
768         JsonArray ignorePaths = new JsonArray();
769         ignorePaths.add(new JsonPrimitive(".gitattributes"));
770         ignorePaths.add(new JsonPrimitive("README.md"));
771         obj.add("ignore", ignorePaths);
772 
773         outf.println(gson.toJson(obj));
774         outf.close();
775     }
776 
writeDefaultContent(String outputDir)777     public void writeDefaultContent(String outputDir) throws IOException {
778         PrintWriter outf = FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "defaultContent.json");
779         System.out.println("Creating packaging file => " + outputDir + "cldr-core" + File.separator + "defaultContent.json");
780         JsonObject obj = new JsonObject();
781         obj.add("defaultContent", gson.toJsonTree(skippedDefaultContentLocales));
782         outf.println(gson.toJson(obj));
783         outf.close();
784     }
785 
writeAvailableLocales(String outputDir)786     public void writeAvailableLocales(String outputDir) throws IOException {
787         PrintWriter outf = FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "availableLocales.json");
788         System.out.println("Creating packaging file => " + outputDir + "cldr-core" + File.separator + "availableLocales.json");
789         JsonObject obj = new JsonObject();
790         obj.add("availableLocales", gson.toJsonTree(avl));
791         outf.println(gson.toJson(obj));
792         outf.close();
793     }
794 
writeScriptMetadata(String outputDir)795     public void writeScriptMetadata(String outputDir) throws IOException {
796         PrintWriter outf = FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "scriptMetadata.json");
797         System.out.println("Creating script metadata file => " + outputDir + File.separator + "cldr-core" + File.separator + "scriptMetadata.json");
798         Map<String, Info> scriptInfo = new TreeMap<>();
799         for (String script : ScriptMetadata.getScripts()) {
800             Info i = ScriptMetadata.getInfo(script);
801             scriptInfo.put(script, i);
802         }
803         if (ScriptMetadata.errors.size() > 0) {
804             System.err.println(Joiner.on("\n\t").join(ScriptMetadata.errors));
805             //throw new IllegalArgumentException();
806         }
807 
808         JsonObject obj = new JsonObject();
809         obj.add("scriptMetadata", gson.toJsonTree(scriptInfo));
810         outf.println(gson.toJson(obj));
811         outf.close();
812     }
813 
814     /**
815      * Process the pending sorting items.
816      *
817      * @param out
818      *            The ArrayList to hold all output lines.
819      * @param nodesForLastItem
820      *            All the nodes from last item.
821      * @param sortingItems
822      *            The item list that should be sorted before output.
823      * @throws IOException
824      * @throws ParseException
825      */
resolveSortingItems(JsonWriter out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> sortingItems)826     private void resolveSortingItems(JsonWriter out,
827         ArrayList<CldrNode> nodesForLastItem,
828         ArrayList<CldrItem> sortingItems)
829         throws IOException, ParseException {
830         ArrayList<CldrItem> arrayItems = new ArrayList<>();
831         String lastLeadingArrayItemPath = null;
832 
833         if (!sortingItems.isEmpty()) {
834             Collections.sort(sortingItems);
835             for (CldrItem item : sortingItems) {
836                 Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(
837                     item.getPath());
838                 if (matcher.matches()) {
839                     String leadingArrayItemPath = matcher.group(1);
840                     if (lastLeadingArrayItemPath != null &&
841                         !lastLeadingArrayItemPath.equals(leadingArrayItemPath)) {
842                         resolveArrayItems(out, nodesForLastItem, arrayItems);
843                     }
844                     lastLeadingArrayItemPath = leadingArrayItemPath;
845                     arrayItems.add(item);
846                 } else {
847                     outputCldrItem(out, nodesForLastItem, item);
848                 }
849             }
850             sortingItems.clear();
851             resolveArrayItems(out, nodesForLastItem, arrayItems);
852         }
853     }
854 
855     /**
856      * Process the pending array items.
857      *
858      * @param out
859      *            The ArrayList to hold all output lines.
860      * @param nodesForLastItem
861      *            All the nodes from last item.
862      * @param arrayItems
863      *            The item list that should be output as array.
864      * @throws IOException
865      * @throws ParseException
866      */
resolveArrayItems(JsonWriter out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> arrayItems)867     private void resolveArrayItems(JsonWriter out,
868         ArrayList<CldrNode> nodesForLastItem,
869         ArrayList<CldrItem> arrayItems)
870         throws IOException, ParseException {
871         boolean rbnfFlag = false;
872         if (!arrayItems.isEmpty()) {
873             CldrItem firstItem = arrayItems.get(0);
874             if (firstItem.needsSort()) {
875                 Collections.sort(arrayItems);
876                 firstItem = arrayItems.get(0);
877             }
878 
879             int arrayLevel = getArrayIndentLevel(firstItem);
880 
881             outputStartArray(out, nodesForLastItem, firstItem, arrayLevel);
882 
883             // Previous statement closed for first element, trim nodesForLastItem
884             // so that it will not happen again inside.
885             int len = nodesForLastItem.size();
886             while (len > arrayLevel) {
887                 nodesForLastItem.remove(len - 1);
888                 len--;
889             }
890             if (arrayItems.get(0).getFullPath().contains("rbnfrule")) {
891                 rbnfFlag = true;
892                 out.beginObject();
893             }
894             for (CldrItem insideItem : arrayItems) {
895 
896                 outputArrayItem(out, insideItem, nodesForLastItem, arrayLevel);
897 
898             }
899             if (rbnfFlag) {
900                 out.endObject();
901             }
902 
903             arrayItems.clear();
904 
905             int lastLevel = nodesForLastItem.size() - 1;
906             closeNodes(out, lastLevel, arrayLevel);
907             if (!rbnfFlag) {
908                 out.endArray();
909             }
910             for (int i = arrayLevel - 1; i < lastLevel; i++) {
911                 nodesForLastItem.remove(i);
912             }
913         }
914     }
915 
916     /**
917      * Find the indent level on which array should be inserted.
918      *
919      * @param item
920      *            The CldrItem being examined.
921      * @return The array indent level.
922      * @throws ParseException
923      */
getArrayIndentLevel(CldrItem item)924     private int getArrayIndentLevel(CldrItem item) throws ParseException {
925         Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(
926             item.getPath());
927         if (!matcher.matches()) {
928             System.out.println("No match found for " + item.getPath() + ", this shouldn't happen.");
929             return 0;
930         }
931 
932         String leadingPath = matcher.group(1);
933         CldrItem fakeItem = new CldrItem(leadingPath, leadingPath, leadingPath, leadingPath, "");
934         return fakeItem.getNodesInPath().size() - 1;
935     }
936 
937     /**
938      * Write the start of an array.
939      *
940      * @param out
941      *            The ArrayList to hold all output lines.
942      * @param nodesForLastItem
943      *            Nodes in path for last CldrItem.
944      * @param item
945      *            The CldrItem to be processed.
946      * @param arrayLevel
947      *            The level on which array is laid out.
948      * @throws IOException
949      * @throws ParseException
950      */
outputStartArray(JsonWriter out, ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel)951     private void outputStartArray(JsonWriter out,
952         ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel)
953         throws IOException, ParseException {
954 
955         ArrayList<CldrNode> nodesInPath = item.getNodesInPath();
956 
957         int i = findFirstDiffNodeIndex(nodesForLastItem, nodesInPath);
958 
959         // close previous nodes
960         closeNodes(out, nodesForLastItem.size() - 2, i);
961 
962         for (; i < arrayLevel - 1; i++) {
963             startNonleafNode(out, nodesInPath.get(i), i);
964         }
965 
966         String objName = nodesInPath.get(i).getNodeKeyName();
967         out.name(objName);
968         if (!item.getFullPath().contains("rbnfrule")) {
969             out.beginArray();
970         }
971     }
972 
973     /**
974      * Write a CLDR item to file.
975      *
976      * "usesMetazone" will be checked to see if it is current. Those non-current
977      * item will be dropped.
978      *
979      * @param out
980      *            The ArrayList to hold all output lines.
981      * @param nodesForLastItem
982      * @param item
983      *            The CldrItem to be processed.
984      * @throws IOException
985      * @throws ParseException
986      */
outputCldrItem(JsonWriter out, ArrayList<CldrNode> nodesForLastItem, CldrItem item)987     private void outputCldrItem(JsonWriter out,
988         ArrayList<CldrNode> nodesForLastItem, CldrItem item)
989         throws IOException, ParseException {
990         // alias has been resolved, no need to keep it.
991         if (item.isAliasItem()) {
992             return;
993         }
994 
995         ArrayList<CldrNode> nodesInPath = item.getNodesInPath();
996         int arraySize = nodesInPath.size();
997 
998         int i = findFirstDiffNodeIndex(nodesForLastItem, nodesInPath);
999         if (i == nodesInPath.size() && type != RunType.rbnf) {
1000             System.err.println("This nodes and last nodes has identical path. ("
1001                 + item.getPath() + ") Some distinguishing attributes wrongly removed?");
1002             return;
1003         }
1004 
1005         // close previous nodes
1006         closeNodes(out, nodesForLastItem.size() - 2, i);
1007 
1008         for (; i < nodesInPath.size() - 1; ++i) {
1009             startNonleafNode(out, nodesInPath.get(i), i);
1010         }
1011 
1012         writeLeafNode(out, nodesInPath.get(i), item.getValue(), i);
1013         nodesForLastItem.clear();
1014         nodesForLastItem.addAll(nodesInPath);
1015     }
1016 
1017     /**
1018      * Close nodes that no longer appears in path.
1019      *
1020      * @param out
1021      *            The JsonWriter to hold all output lines.
1022      * @param last
1023      *            The last node index in previous item.
1024      * @param firstDiff
1025      *            The first different node in next item.
1026      * @throws IOException
1027      */
closeNodes(JsonWriter out, int last, int firstDiff)1028     private void closeNodes(JsonWriter out, int last, int firstDiff)
1029         throws IOException {
1030         for (int i = last; i >= firstDiff; --i) {
1031             if (i == 0) {
1032                 out.endObject();
1033                 break;
1034             }
1035             out.endObject();
1036         }
1037     }
1038 
1039     /**
1040      * Start a non-leaf node, write out its attributes.
1041      *
1042      * @param out
1043      *            The ArrayList to hold all output lines.
1044      * @param node
1045      *            The node being written.
1046      * @param level
1047      *            indentation level.
1048      * @throws IOException
1049      */
startNonleafNode(JsonWriter out, CldrNode node, int level)1050     private void startNonleafNode(JsonWriter out, CldrNode node, int level)
1051         throws IOException {
1052         String objName = node.getNodeKeyName();
1053         // Some node should be skipped as indicated by objName being null.
1054         if (objName == null) {
1055             return;
1056         }
1057 
1058         // first level needs no key, it is the container.
1059         if (level == 0) {
1060             out.beginObject();
1061             return;
1062         }
1063 
1064         Map<String, String> attrAsValueMap = node.getAttrAsValueMap();
1065 
1066         if( type == RunType.annotations || type == RunType.annotationsDerived ) {
1067             if(objName.startsWith("U+")) {
1068                 // parse U+22 -> "   etc
1069                 out.name(com.ibm.icu.text.UTF16.valueOf(Integer.parseInt(objName.substring(2), 16)));
1070             } else {
1071                 out.name(objName);
1072             }
1073         } else {
1074             out.name(objName);
1075         }
1076 
1077         out.beginObject();
1078         for (String key : attrAsValueMap.keySet()) {
1079             String value = escapeValue(attrAsValueMap.get(key));
1080             // attribute is prefixed with "_" when being used as key.
1081             out.name("_" + key).value(value);
1082         }
1083     }
1084 
1085     /**
1086      * Write a CLDR item to file.
1087      *
1088      * "usesMetazone" will be checked to see if it is current. Those non-current
1089      * item will be dropped.
1090      *
1091      * @param out
1092      *            The ArrayList to hold all output lines.
1093      * @param item
1094      *            The CldrItem to be processed.
1095      * @param nodesForLastItem
1096      *            Nodes in path for last item.
1097      * @param arrayLevel
1098      *            The indentation level in which array exists.
1099      * @throws IOException
1100      * @throws ParseException
1101      */
outputArrayItem(JsonWriter out, CldrItem item, ArrayList<CldrNode> nodesForLastItem, int arrayLevel)1102     private void outputArrayItem(JsonWriter out, CldrItem item,
1103         ArrayList<CldrNode> nodesForLastItem, int arrayLevel)
1104         throws IOException, ParseException {
1105 
1106         // This method is more complicated that outputCldrItem because it needs to
1107         // handle 3 different cases.
1108         // 1. When difference is found below array item, this item will be of the
1109         // same array item. Inside the array item, it is about the same as
1110         // outputCldrItem, just with one more level of indentation because of
1111         // the array.
1112         // 2. The array item is the leaf item with no attribute, simplify it as
1113         // an object with one name/value pair.
1114         // 3. The array item is the leaf item with attribute, an embedded object
1115         // will be created inside the array item object.
1116 
1117         ArrayList<CldrNode> nodesInPath = item.getNodesInPath();
1118         String value = escapeValue(item.getValue());
1119         int nodesNum = nodesInPath.size();
1120 
1121         // case 1
1122         int diff = findFirstDiffNodeIndex(nodesForLastItem, nodesInPath);
1123         if (diff > arrayLevel) {
1124             // close previous nodes
1125             closeNodes(out, nodesForLastItem.size() - 1, diff + 1);
1126 
1127             for (int i = diff; i < nodesNum - 1; i++) {
1128                 startNonleafNode(out, nodesInPath.get(i), i + 1);
1129             }
1130             writeLeafNode(out, nodesInPath.get(nodesNum - 1), value, nodesNum);
1131             return;
1132         }
1133 
1134         if (arrayLevel == nodesNum - 1) {
1135             // case 2
1136             // close previous nodes
1137             if (nodesForLastItem.size() - 1 - arrayLevel > 0) {
1138                 closeNodes(out, nodesForLastItem.size() - 1, arrayLevel);
1139             }
1140 
1141             String objName = nodesInPath.get(nodesNum - 1).getNodeKeyName();
1142             int pos = objName.indexOf('-');
1143             if (pos > 0) {
1144                 objName = objName.substring(0, pos);
1145             }
1146 
1147             Map<String, String> attrAsValueMap = nodesInPath.get(nodesNum - 1).getAttrAsValueMap();
1148 
1149             // ADJUST RADIX BASED ON ICU RULE -- BEGIN
1150             if (attrAsValueMap.containsKey("radix")) {
1151                 String radixValue = attrAsValueMap.get("radix");
1152                 attrAsValueMap.remove("radix");
1153                 for (Map.Entry<String, String> attributes : attrAsValueMap.entrySet()) {
1154                     String oldKey = attributes.getKey();
1155                     String newValue = attributes.getValue();
1156                     String newKey = oldKey + "/" + radixValue;
1157                     attrAsValueMap.remove(oldKey);
1158                     attrAsValueMap.put(newKey, newValue);
1159 
1160                 }
1161             }
1162             // ADJUST RADIX BASED ON ICU RULE -- END
1163 
1164             if (attrAsValueMap.isEmpty()) {
1165                 out.beginObject();
1166                 out.name(objName).value(value);
1167                 out.endObject();
1168             } else {
1169                 if (!objName.equals("rbnfrule")) {
1170                     out.beginObject();
1171                 }
1172                 writeLeafNode(out, objName, attrAsValueMap, value, nodesNum);
1173                 if (!objName.equals("rbnfrule")) {
1174                     out.endObject();
1175                 }
1176 
1177             }
1178             // the last node is closed, remove it.
1179             nodesInPath.remove(nodesNum - 1);
1180         } else {
1181             // case 3
1182             // close previous nodes
1183             if (nodesForLastItem.size() - 1 - (arrayLevel) > 0) {
1184                 closeNodes(out, nodesForLastItem.size() - 1, arrayLevel);
1185             }
1186 
1187             out.beginObject();
1188 
1189             CldrNode node = nodesInPath.get(arrayLevel);
1190             String objName = node.getNodeKeyName();
1191             int pos = objName.indexOf('-');
1192             if (pos > 0) {
1193                 objName = objName.substring(0, pos);
1194             }
1195             Map<String, String> attrAsValueMap = node.getAttrAsValueMap();
1196             out.name(objName);
1197             out.beginObject();
1198             for (String key : attrAsValueMap.keySet()) {
1199                 // attribute is prefixed with "_" when being used as key.
1200                 out.name("_" + key).value(escapeValue(attrAsValueMap.get(key)));
1201             }
1202 
1203             for (int i = arrayLevel + 1; i < nodesInPath.size() - 1; i++) {
1204                 startNonleafNode(out, nodesInPath.get(i), i + 1);
1205             }
1206             writeLeafNode(out, nodesInPath.get(nodesNum - 1), value, nodesNum);
1207         }
1208 
1209         nodesForLastItem.clear();
1210         nodesForLastItem.addAll(nodesInPath);
1211     }
1212 
1213     /**
1214      * Compare two nodes list, find first index that the two list have different
1215      * nodes and return it.
1216      *
1217      * @param nodesForLastItem
1218      *            Nodes from last item.
1219      * @param nodesInPath
1220      *            Nodes for current item.
1221      * @return The index of first different node.
1222      */
findFirstDiffNodeIndex(ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrNode> nodesInPath)1223     private int findFirstDiffNodeIndex(ArrayList<CldrNode> nodesForLastItem,
1224         ArrayList<CldrNode> nodesInPath) {
1225         int i;
1226         for (i = 0; i < nodesInPath.size(); ++i) {
1227             if (i >= nodesForLastItem.size() ||
1228                 !nodesInPath.get(i).getNodeDistinguishingName().equals(
1229                     nodesForLastItem.get(i).getNodeDistinguishingName())) {
1230                 break;
1231             }
1232         }
1233         return i;
1234     }
1235 
progressPrefix(AtomicInteger readCount, int totalCount)1236     private final String progressPrefix(AtomicInteger readCount, int totalCount) {
1237         return progressPrefix(readCount.get(), totalCount);
1238     }
1239 
progressPrefix(int readCount, int totalCount)1240     private final String progressPrefix(int readCount, int totalCount) {
1241         return String.format("[%d/%d]:\t", readCount, totalCount);
1242     }
1243 
1244     /**
1245      * Process files in a directory of CLDR file tree.
1246      *
1247      * @param dirName
1248      *            The directory in which xml file will be transformed.
1249      * @param minimalDraftStatus
1250      *            The minimumDraftStatus that will be accepted.
1251      * @throws IOException
1252      * @throws ParseException
1253      */
processDirectory(String dirName, DraftStatus minimalDraftStatus)1254     public void processDirectory(String dirName, DraftStatus minimalDraftStatus)
1255         throws IOException, ParseException {
1256         SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(cldrCommonDir + "supplemental");
1257         Factory cldrFactory = Factory.make(
1258             cldrCommonDir + dirName + "/", ".*");
1259         Set<String> files = cldrFactory.getAvailable();
1260         final int total = files.size();
1261         AtomicInteger readCount = new AtomicInteger(0);
1262         Map<String, Throwable> errs = new TreeMap<>();
1263         files
1264             .parallelStream()
1265             .unordered()
1266             .forEach(filename -> {
1267 
1268                 if (LdmlConvertRules.IGNORE_FILE_SET.contains(filename)) {
1269                     return;
1270                 }
1271                 if (!filename.matches(match)) {
1272                     return;
1273                 }
1274 
1275                 String pathPrefix;
1276                 CLDRFile file = cldrFactory.make(filename, resolve && type == RunType.main, minimalDraftStatus);
1277                 // Print 'reading' after the make, to stagger the output a little bit.
1278                 // Otherwise, the printout happens before any work happens, and is easily out of order.
1279                 System.out.println("<"+progressPrefix(readCount.incrementAndGet(), total) +
1280                     "Reading " + dirName + "/" + filename);
1281 
1282                 if (type == RunType.main) {
1283                     pathPrefix = "/cldr/" + dirName + "/" + filename.replaceAll("_", "-") + "/";
1284                 } else {
1285                     pathPrefix = "/cldr/" + dirName + "/";
1286                 }
1287 
1288                 try {
1289                     convertCldrItems(readCount, total, dirName, filename, pathPrefix,
1290                         mapPathsToSections(readCount, total, file, pathPrefix, sdi));
1291                 } catch(IOException | ParseException t) {
1292                     t.printStackTrace();
1293                     System.err.println("!"+progressPrefix(readCount.incrementAndGet(), total)+filename + " - err - " + t);
1294                     errs.put(filename, t);
1295                 } finally {
1296                     System.out.println("."+progressPrefix(readCount, total) +
1297                         "Completing " + dirName + "/" + filename);
1298                 }
1299             });
1300 
1301         if(!errs.isEmpty()) {
1302             System.err.println("Errors in these files:");
1303             for(Map.Entry<String,Throwable> e : errs.entrySet()) {
1304                 System.err.println(e.getKey() + " - " + e.getValue());
1305             }
1306             // rethrow
1307             for(Map.Entry<String,Throwable> e : errs.entrySet()) {
1308                 if(e.getValue() instanceof IOException ) {
1309                     throw (IOException)e.getValue(); // throw the first one
1310                 } else if(e.getValue() instanceof ParseException ) {
1311                     throw (ParseException)e.getValue(); // throw the first one
1312                 } else {
1313                     throw new RuntimeException("Other exception thrown: "  + e.getValue());
1314                 }
1315                 /* NOTREACHED */
1316             }
1317         }
1318 
1319         if (writePackages) {
1320             for (String currentPackage : packages) {
1321                 writePackagingFiles(outputDir, currentPackage);
1322             }
1323             if (type == RunType.main) {
1324                 writeDefaultContent(outputDir);
1325                 writeAvailableLocales(outputDir);
1326             } else if (type == RunType.supplemental) {
1327                 writeScriptMetadata(outputDir);
1328             }
1329 
1330         }
1331     }
1332 
1333     /**
1334      * Replacement pattern for escaping.
1335      */
1336     private static final Pattern escapePattern = PatternCache.get("\\\\(?!u)");
1337 
1338     /**
1339      * Escape \ and " in value string.
1340      * \ should be replaced by \\, except in case of \u1234
1341      * " should be replaced by \"
1342      * In following code, \\\\ represent one \, because java compiler and
1343      * regular expression compiler each do one round of escape.
1344      *
1345      * @param value
1346      *            Input string.
1347      * @return escaped string.
1348      */
escapeValue(String value)1349     private String escapeValue(String value) {
1350         Matcher match = escapePattern.matcher(value);
1351         String ret = match.replaceAll("\\\\\\\\");
1352         return ret.replace("\"", "\\\"").replace("\n", " ").replace("\t", " ");
1353     }
1354 
1355     /**
1356      * Write the value to output.
1357      *
1358      * @param out
1359      *            The ArrayList to hold all output lines.
1360      * @param node
1361      *            The CldrNode being written.
1362      * @param value
1363      *            The value part for this element.
1364      * @param level
1365      *            Indent level.
1366      * @throws IOException
1367      */
writeLeafNode(JsonWriter out, CldrNode node, String value, int level)1368     private void writeLeafNode(JsonWriter out, CldrNode node, String value,
1369         int level) throws IOException {
1370 
1371         String objName = node.getNodeKeyName();
1372         Map<String, String> attrAsValueMaps = node.getAttrAsValueMap();
1373         writeLeafNode(out, objName, attrAsValueMaps, value, level);
1374     }
1375 
1376     /**
1377      * Write the value to output.
1378      *
1379      * @param out
1380      *            The ArrayList to hold all output lines.
1381      * @param objName
1382      *            The node's node.
1383      * @param attrAsValueMap
1384      *            Those attributes that will be treated as values.
1385      * @param value
1386      *            The value part for this element.
1387      * @param level
1388      *            Indent level.
1389      * @throws IOException
1390      */
writeLeafNode(JsonWriter out, String objName, Map<String, String> attrAsValueMap, String value, int level)1391     private void writeLeafNode(JsonWriter out, String objName,
1392         Map<String, String> attrAsValueMap, String value, int level)
1393         throws IOException {
1394         if (objName == null) {
1395             return;
1396         }
1397         value = escapeValue(value);
1398 
1399         if (attrAsValueMap.isEmpty()) {
1400             out.name(objName);
1401             if (value.isEmpty()) {
1402                 out.beginObject();
1403                 out.endObject();
1404             } else if (type == RunType.annotations ||
1405                 type == RunType.annotationsDerived) {
1406                 out.beginArray();
1407                 // split this, so "a | b | c" becomes ["a","b","c"]
1408                 for (final String s : Annotations.splitter.split(value.trim())) {
1409                     out.value(s);
1410                 }
1411                 out.endArray();
1412             } else {
1413                 // normal value
1414                 out.value(value);
1415             }
1416             return;
1417         }
1418 
1419         // If there is no value, but a attribute being treated as value,
1420         // simplify the output.
1421         if (value.isEmpty() &&
1422             attrAsValueMap.containsKey(LdmlConvertRules.ANONYMOUS_KEY)) {
1423             out.name(objName).value(attrAsValueMap.get(LdmlConvertRules.ANONYMOUS_KEY));
1424             return;
1425         }
1426         if (!objName.equals("rbnfrule")) {
1427             out.name(objName);
1428             out.beginObject();
1429         }
1430 
1431         if (!value.isEmpty()) {
1432             out.name("_value").value(value);
1433         }
1434 
1435         for (String key : attrAsValueMap.keySet()) {
1436             String attrValue = escapeValue(attrAsValueMap.get(key));
1437             // attribute is prefixed with "_" when being used as key.
1438             if (LdmlConvertRules.ATTRVALUE_AS_ARRAY_SET.contains(key)) {
1439                 String[] strings = attrValue.trim().split("\\s+");
1440                 if (type != RunType.rbnf) {
1441                     out.name("_" + key);
1442                 } else {
1443                     out.name(key);
1444                 }
1445                 out.beginArray();
1446                 for (String s : strings) {
1447                     out.value(s);
1448                 }
1449                 out.endArray();
1450             } else if (type != RunType.rbnf) {
1451                     out.name("_" + key).value(attrValue);
1452             } else {
1453                 out.name(key).value(attrValue);
1454             }
1455         }
1456         if (!objName.equals("rbnfrule")) {
1457             out.endObject();
1458         }
1459     }
1460 }
1461