• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.collect.HashMultimap;
4 import com.google.common.collect.ImmutableListMultimap;
5 import com.google.common.collect.ImmutableSet;
6 import com.google.common.collect.ImmutableSortedSet;
7 import com.google.common.collect.Multimap;
8 import com.google.common.collect.Sets;
9 import com.google.common.io.Files;
10 import com.ibm.icu.util.ICUUncheckedIOException;
11 import com.ibm.icu.util.Output;
12 import java.io.File;
13 import java.io.FileNotFoundException;
14 import java.io.IOException;
15 import java.io.PrintWriter;
16 import java.util.Arrays;
17 import java.util.Collection;
18 import java.util.HashSet;
19 import java.util.LinkedHashSet;
20 import java.util.Map;
21 import java.util.Objects;
22 import java.util.Set;
23 import java.util.TreeMap;
24 import java.util.TreeSet;
25 import java.util.regex.Matcher;
26 import java.util.regex.Pattern;
27 import org.unicode.cldr.tool.Option.Options;
28 import org.unicode.cldr.tool.Option.Params;
29 import org.unicode.cldr.util.AnnotationUtil;
30 import org.unicode.cldr.util.CLDRConfig;
31 import org.unicode.cldr.util.CLDRFile;
32 import org.unicode.cldr.util.CLDRPaths;
33 import org.unicode.cldr.util.CldrUtility;
34 import org.unicode.cldr.util.DtdType;
35 import org.unicode.cldr.util.Factory;
36 import org.unicode.cldr.util.GlossonymConstructor;
37 import org.unicode.cldr.util.InternalCldrException;
38 import org.unicode.cldr.util.Level;
39 import org.unicode.cldr.util.LocaleIDParser;
40 import org.unicode.cldr.util.LocaleNames;
41 import org.unicode.cldr.util.LogicalGrouping;
42 import org.unicode.cldr.util.SupplementalDataInfo;
43 import org.unicode.cldr.util.SupplementalDataInfo.ParentLocaleComponent;
44 import org.unicode.cldr.util.XMLSource;
45 import org.unicode.cldr.util.XPathParts;
46 
47 public class GenerateProductionData {
48     private static boolean DEBUG = false;
49     private static boolean VERBOSE = false;
50     private static Matcher FILE_MATCH = null;
51 
52     private static String SOURCE_COMMON_DIR = null;
53     private static String DEST_COMMON_DIR = null;
54 
55     private static boolean ADD_LOGICAL_GROUPS = false;
56     private static boolean ADD_DATETIME = false;
57     private static boolean ADD_SIDEWAYS = false;
58     private static boolean ADD_ROOT = false;
59     private static boolean INCLUDE_COMPREHENSIVE = false;
60     private static boolean CONSTRAINED_RESTORATION = false;
61 
62     private static final Set<String> NON_XML =
63             ImmutableSet.of("dtd", "properties", "testData", "uca");
64     private static final Set<String> COPY_ANYWAY =
65             ImmutableSet.of(
66                     "casing",
67                     "collation"); // don't want to "clean up", makes format difficult to use
68     private static final SupplementalDataInfo SDI =
69             CLDRConfig.getInstance().getSupplementalDataInfo();
70 
71     enum MyOptions {
72         sourceDirectory(
73                 new Params()
74                         .setHelp("source common directory")
75                         .setDefault(CLDRPaths.COMMON_DIRECTORY)
76                         .setMatch(".*")),
77         destinationDirectory(
78                 new Params()
79                         .setHelp("destination common directory")
80                         .setDefault(CLDRPaths.STAGING_DIRECTORY + "production/common")
81                         .setMatch(".*")),
82         logicalGroups(
83                 new Params()
84                         .setHelp("add path/values for logical groups")
85                         .setDefault("true")
86                         .setMatch("true|false")),
87         time(
88                 new Params()
89                         .setHelp("add path/values for stock date/time/datetime")
90                         .setDefault("true")
91                         .setMatch("true|false")),
92         Sideways(
93                 new Params()
94                         .setHelp("add path/values for sideways inheritance")
95                         .setDefault("true")
96                         .setMatch("true|false")),
97         root(
98                 new Params()
99                         .setHelp("add path/values for root and code-fallback")
100                         .setDefault("true")
101                         .setMatch("true|false")),
102         constrainedRestoration(
103                 new Params()
104                         .setHelp("only add inherited paths that were in original file")
105                         .setDefault("true")
106                         .setMatch("true|false")),
107         includeComprehensive(
108                 new Params()
109                         .setHelp("exclude comprehensive paths — otherwise just to modern level")
110                         .setDefault("true")
111                         .setMatch("true|false")),
112         verbose(new Params().setHelp("verbose debugging messages")),
113         Debug(new Params().setHelp("debug")),
114         fileMatch(new Params().setHelp("regex to match patterns").setMatch(".*")),
115         ;
116 
117         // BOILERPLATE TO COPY
118         final Option option;
119 
MyOptions(Params params)120         private MyOptions(Params params) {
121             option = new Option(this, params);
122         }
123 
124         private static Options myOptions = new Options();
125 
126         static {
127             for (MyOptions option : MyOptions.values()) {
myOptions.add(option, option.option)128                 myOptions.add(option, option.option);
129             }
130         }
131 
parse(String[] args, boolean showArguments)132         private static Set<String> parse(String[] args, boolean showArguments) {
133             return myOptions.parse(MyOptions.values()[0], args, true);
134         }
135     }
136 
main(String[] args)137     public static void main(String[] args) {
138         // TODO rbnf and segments don't have modern coverage; fix there.
139 
140         MyOptions.parse(args, true);
141         SOURCE_COMMON_DIR = MyOptions.sourceDirectory.option.getValue();
142         DEST_COMMON_DIR = MyOptions.destinationDirectory.option.getValue();
143 
144         // debugging
145         VERBOSE = MyOptions.verbose.option.doesOccur();
146         DEBUG = MyOptions.Debug.option.doesOccur();
147         String fileMatch = MyOptions.fileMatch.option.getValue();
148         if (fileMatch != null) {
149             FILE_MATCH = Pattern.compile(fileMatch).matcher("");
150         }
151 
152         // controls for minimization
153         ADD_LOGICAL_GROUPS = "true".equalsIgnoreCase(MyOptions.logicalGroups.option.getValue());
154         ADD_DATETIME = "true".equalsIgnoreCase(MyOptions.time.option.getValue());
155         ADD_SIDEWAYS = "true".equalsIgnoreCase(MyOptions.Sideways.option.getValue());
156         ADD_ROOT = "true".equalsIgnoreCase(MyOptions.root.option.getValue());
157 
158         // constraints
159         INCLUDE_COMPREHENSIVE =
160                 "true".equalsIgnoreCase(MyOptions.includeComprehensive.option.getValue());
161         CONSTRAINED_RESTORATION =
162                 "true".equalsIgnoreCase(MyOptions.constrainedRestoration.option.getValue());
163 
164         // get directories
165 
166         Map<File, File> specialDirectories = new TreeMap<>();
167 
168         Arrays.asList(DtdType.values())
169                 // .parallelStream()
170                 // .unordered()
171                 .forEach(
172                         type -> {
173                             boolean isLdmlDtdType = type == DtdType.ldml;
174 
175                             // bit of a hack, using the ldmlICU — otherwise unused! — to get the
176                             // nonXML files.
177                             Set<String> directories =
178                                     (type == DtdType.ldmlICU) ? NON_XML : type.directories;
179 
180                             for (String dir : directories) {
181                                 File sourceDir = new File(SOURCE_COMMON_DIR, dir);
182                                 File destinationDir = new File(DEST_COMMON_DIR, dir);
183                                 Stats stats = new Stats();
184                                 copyFilesAndReturnIsEmpty(
185                                         sourceDir, destinationDir, null, isLdmlDtdType, stats);
186                                 if (directoryIsSpecial(sourceDir.getAbsolutePath())) {
187                                     specialDirectories.put(sourceDir, destinationDir);
188                                 }
189                             }
190                         });
191 
192         for (File source : specialDirectories.keySet()) {
193             File dest = specialDirectories.get(source);
194             doubleCheckSpecialPaths(source, dest);
195         }
196     }
197 
198     private static class Stats {
199         long files;
200         long removed;
201         long retained;
202         long remaining;
203 
clear()204         Stats clear() {
205             files = removed = retained = remaining = 0;
206             return this;
207         }
208 
209         @Override
toString()210         public String toString() {
211             return "files="
212                     + files
213                     + (removed + retained + remaining == 0
214                             ? ""
215                             : "; removed="
216                                     + removed
217                                     + "; retained="
218                                     + retained
219                                     + "; remaining="
220                                     + remaining);
221         }
222 
showNonZero(String label)223         public void showNonZero(String label) {
224             if (removed + retained + remaining != 0) {
225                 System.out.println(label + toString());
226             }
227         }
228     }
229 
230     /**
231      * Copy files in directories, recursively.
232      *
233      * @param sourceFile
234      * @param destinationFile
235      * @param factory
236      * @param isLdmlDtdType
237      * @param stats
238      * @return true if the file is an ldml file with empty content.
239      */
copyFilesAndReturnIsEmpty( File sourceFile, File destinationFile, Factory factory, boolean isLdmlDtdType, final Stats stats)240     private static boolean copyFilesAndReturnIsEmpty(
241             File sourceFile,
242             File destinationFile,
243             Factory factory,
244             boolean isLdmlDtdType,
245             final Stats stats) {
246         if (sourceFile.isDirectory()) {
247 
248             System.out.println(sourceFile + " => " + destinationFile);
249             if (!destinationFile.mkdirs()) {
250                 // if created, remove old contents
251                 Arrays.stream(destinationFile.listFiles()).forEach(File::delete);
252             }
253 
254             Set<String> sorted = new TreeSet<>();
255             sorted.addAll(Arrays.asList(sourceFile.list()));
256 
257             if (COPY_ANYWAY.contains(sourceFile.getName())) { // special cases
258                 isLdmlDtdType = false;
259             }
260             // reset factory for directory
261             factory = null;
262             if (isLdmlDtdType) {
263                 // if the factory is empty, then we just copy files
264                 factory = Factory.make(sourceFile.toString(), ".*");
265             }
266             boolean isMainDir = factory != null && sourceFile.getName().contentEquals("main");
267             boolean isRbnfDir = factory != null && sourceFile.getName().contentEquals("rbnf");
268             boolean isAnnotationsDir =
269                     factory != null && sourceFile.getName().startsWith("annotations");
270 
271             Set<String> emptyLocales = new HashSet<>();
272             final Stats stats2 = new Stats();
273             final Factory theFactory = factory;
274             final boolean isLdmlDtdType2 = isLdmlDtdType;
275             sorted
276                     // .parallelStream()
277                     .forEach(
278                     file -> {
279                         File sourceFile2 = new File(sourceFile, file);
280                         File destinationFile2 = new File(destinationFile, file);
281                         if (VERBOSE) System.out.println("\t" + file);
282 
283                         // special step to just copy certain files like main/root.xml file
284                         Factory currFactory = theFactory;
285                         if (isMainDir) {
286                             if (file.equals("root.xml")) {
287                                 currFactory = null;
288                             }
289                         } else if (isRbnfDir) {
290                             currFactory = null;
291                         }
292 
293                         // when the currFactory is null, we just copy files as-is
294                         boolean isEmpty =
295                                 copyFilesAndReturnIsEmpty(
296                                         sourceFile2,
297                                         destinationFile2,
298                                         currFactory,
299                                         isLdmlDtdType2,
300                                         stats2);
301                         if (isEmpty) { // only happens for ldml
302                             emptyLocales.add(getLocaleIdFromFileName(file));
303                         }
304                     });
305             stats2.showNonZero("\tTOTAL:\t");
306             // if there are empty ldml files, AND we aren't in /main/,
307             // then remove any without children
308             if (!emptyLocales.isEmpty() && !isMainDir) {
309                 Set<String> childless =
310                         getChildless(emptyLocales, factory.getAvailable(), isAnnotationsDir);
311                 if (!childless.isEmpty()) {
312                     if (VERBOSE)
313                         System.out.println(
314                                 "\t" + destinationFile + "\tRemoving empty locales:" + childless);
315                     childless.stream()
316                             .forEach(locale -> new File(destinationFile, locale + ".xml").delete());
317                 }
318             }
319             return false;
320         } else if (factory != null) {
321             String file = sourceFile.getName();
322             if (!file.endsWith(".xml")) {
323                 return false;
324             }
325             String localeId = getLocaleIdFromFileName(file);
326             if (FILE_MATCH != null) {
327                 if (!FILE_MATCH.reset(localeId).matches()) {
328                     return false;
329                 }
330             }
331             boolean isRoot = localeId.equals(LocaleNames.ROOT);
332 
333             CLDRFile cldrFileUnresolved = factory.make(localeId, false);
334             CLDRFile cldrFileResolved = factory.make(localeId, true);
335             boolean gotOne = false;
336             Set<String> toRemove = new TreeSet<>(); // TreeSet just makes debugging easier
337             Set<String> toRetain = new TreeSet<>();
338             Set<String> toRetainSpecial = new TreeSet<>();
339             Output<String> pathWhereFound = new Output<>();
340             Output<String> localeWhereFound = new Output<>();
341 
342             final boolean specialPathsAreRequired =
343                     areSpecialPathsRequired(localeId, sourceFile.toString());
344 
345             String debugPath =
346                     "//ldml/localeDisplayNames/languages/language[@type=\"en_US\"]"; // "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"power-kilowatt\"]/displayName";
347             boolean debugLocale = localeId.equals("pt");
348 
349             ImmutableSet<String> sortedPaths =
350                     ImmutableSortedSet.copyOf(cldrFileUnresolved); // sort for debugging
351 
352             for (String xpath : sortedPaths) {
353                 if (xpath.startsWith("//ldml/identity")) {
354                     continue;
355                 }
356                 if (debugPath != null && debugLocale && xpath.startsWith(debugPath)) {
357                     int debug = 0;
358                 }
359 
360                 String value = cldrFileUnresolved.getStringValue(xpath);
361                 if (value == null || CldrUtility.INHERITANCE_MARKER.equals(value)) {
362                     toRemove.add(xpath);
363                     continue;
364                 }
365 
366                 // special-case the root values that are only for Survey Tool use
367 
368                 if (isRoot) {
369                     if (AnnotationUtil.pathIsAnnotation(xpath)) {
370                         toRemove.add(xpath);
371                         continue;
372                     }
373                 }
374 
375                 if (specialPathsAreRequired && pathIsSpecial(xpath)) {
376                     toRetainSpecial.add(xpath);
377                 }
378 
379                 // Remove items that are the same as their bailey values.
380                 // However, two optional parameters change what happens
381                 // if ADD_SIDEWAYS is true, then we check for paths equal (condidtionally, see the
382                 // method doc)
383                 // if ADD_ROOT is true, then we check for the found locale being root
384 
385                 String bailey =
386                         cldrFileResolved.getBaileyValue(xpath, pathWhereFound, localeWhereFound);
387                 if (value.equals(bailey)
388                         && (!ADD_SIDEWAYS
389                                 || pathEqualsOrIsOkAltVariantOf(
390                                         cldrFileResolved,
391                                         xpath,
392                                         pathWhereFound.value,
393                                         localeId,
394                                         localeWhereFound.value))
395                         && (!ADD_ROOT
396                                 || (!Objects.equals(XMLSource.ROOT_ID, localeWhereFound.value)
397                                         && !Objects.equals(
398                                                 XMLSource.CODE_FALLBACK_ID,
399                                                 localeWhereFound.value)))) {
400                     toRemove.add(xpath);
401                     continue;
402                 }
403 
404                 // remove level=comprehensive (under setting)
405 
406                 if (!INCLUDE_COMPREHENSIVE) {
407                     Level coverage = SDI.getCoverageLevel(xpath, localeId);
408                     if (coverage == Level.COMPREHENSIVE) {
409                         toRemove.add(xpath);
410                         continue;
411                     }
412                 }
413 
414                 // if we got all the way to here, we have a non-empty result
415 
416                 // check to see if we might need to flesh out logical groups
417                 // TODO Should be done in the converter tool!!
418                 if (ADD_LOGICAL_GROUPS && !LogicalGrouping.isOptional(cldrFileResolved, xpath)) {
419                     Set<String> paths = LogicalGrouping.getPaths(cldrFileResolved, xpath);
420                     if (paths != null && paths.size() > 1) {
421                         for (String possiblePath : paths) {
422                             // Unclear from API whether we need to do this filtering
423                             if (!LogicalGrouping.isOptional(cldrFileResolved, possiblePath)) {
424                                 toRetain.add(possiblePath);
425                             }
426                         }
427                     }
428                 }
429 
430                 // check to see if we might need to flesh out datetime.
431                 // TODO Should be done in the converter tool!!
432                 if (ADD_DATETIME && isDateTimePath(xpath)) {
433                     toRetain.addAll(dateTimePaths(xpath));
434                 }
435 
436                 // past the gauntlet
437                 gotOne = true;
438             }
439             if (specialPathsAreRequired) {
440                 addSpecialPathsIfMissing(toRetainSpecial);
441             }
442 
443             // we even add empty files, but can delete them back on the directory level.
444             try (PrintWriter pw = new PrintWriter(destinationFile)) {
445                 CLDRFile outCldrFile = cldrFileUnresolved.cloneAsThawed();
446 
447                 // Remove paths, but pull out the ones to retain
448                 // example:
449                 // toRemove == {a b c} // c may have ^^^ value
450                 // toRetain == {b c d} // d may have ^^^ value
451 
452                 if (DEBUG) {
453                     showIfNonZero(localeId, "removing", toRemove);
454                     showIfNonZero(localeId, "retaining", toRetain);
455                     showIfNonZero(localeId, "retaining for special paths", toRetainSpecial);
456                 }
457                 if (CONSTRAINED_RESTORATION) {
458                     toRetain.retainAll(toRemove); // only add paths that were there already
459                     // toRetain == {b c}
460                     if (DEBUG) {
461                         showIfNonZero(localeId, "constrained retaining", toRetain);
462                     }
463                 }
464                 // add "special" paths even if CONSTRAINED_RESTORATION
465                 toRetain.addAll(toRetainSpecial);
466 
467                 boolean changed0 = toRemove.removeAll(toRetain);
468                 // toRemove == {a}
469                 if (DEBUG && changed0) {
470                     showIfNonZero(localeId, "final removing", toRemove);
471                 }
472 
473                 boolean changed = toRetain.removeAll(toRemove);
474                 // toRetain = {b c d} or if constrained, {b c}
475                 if (DEBUG && changed) {
476                     showIfNonZero(localeId, "final retaining", toRetain);
477                 }
478 
479                 outCldrFile.removeAll(toRemove, false);
480                 if (DEBUG) {
481                     for (String xpath : toRemove) {
482                         System.out.println(
483                                 localeId
484                                         + ": removing: «"
485                                         + cldrFileUnresolved.getStringValue(xpath)
486                                         + "», "
487                                         + xpath);
488                     }
489                 }
490 
491                 // now set any null values to bailey values if not present
492                 for (String xpath : toRetain) {
493                     if (debugPath != null
494                             && localeId.equals(debugLocale)
495                             && xpath.equals(debugPath)) {
496                         int debug = 0;
497                     }
498                     String value = cldrFileResolved.getStringValue(xpath);
499                     if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) {
500                         throw new IllegalArgumentException(
501                                 localeId + ": " + value + " in value for " + xpath);
502                     } else {
503                         if (DEBUG) {
504                             String oldValue = cldrFileUnresolved.getStringValue(xpath);
505                             System.out.println(
506                                     "Restoring: «" + oldValue + "» ⇒ «" + value + "»\t" + xpath);
507                         }
508                         outCldrFile.add(xpath, value);
509                     }
510                 }
511 
512                 // double-check results
513                 int count = 0;
514                 for (String xpath : outCldrFile) {
515                     if (debugPath != null
516                             && localeId.equals(debugLocale)
517                             && xpath.equals(debugPath)) {
518                         int debug = 0;
519                     }
520                     String value = outCldrFile.getStringValue(xpath);
521                     if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) {
522                         throw new IllegalArgumentException(
523                                 localeId + ": " + value + " in value for " + xpath);
524                     }
525                 }
526 
527                 outCldrFile.write(pw);
528                 ++stats.files;
529                 stats.removed += toRemove.size();
530                 stats.retained += toRetain.size();
531                 stats.remaining += count;
532             } catch (FileNotFoundException e) {
533                 throw new ICUUncheckedIOException(
534                         "Can't copy " + sourceFile + " to " + destinationFile + " — ", e);
535             }
536             return !gotOne;
537         } else {
538             if (FILE_MATCH != null) {
539                 String file = sourceFile.getName();
540                 int dotPos = file.lastIndexOf('.');
541                 String baseName = dotPos >= 0 ? file.substring(0, file.length() - dotPos) : file;
542                 if (!FILE_MATCH.reset(baseName).matches()) {
543                     return false;
544                 }
545             }
546             // for now, just copy
547             ++stats.files;
548             copyFiles(sourceFile, destinationFile);
549             return false;
550         }
551     }
552 
getLocaleIdFromFileName(String file)553     private static String getLocaleIdFromFileName(String file) {
554         return file.substring(0, file.length() - 4); // drop ".xml"
555     }
556 
557     /**
558      * Are any "special paths" required to be explicitly included for this locale, in this
559      * directory?
560      *
561      * <p>Currently this requirement applies only to Arabic defaultNumberingSystem, only in
562      * common/main
563      *
564      * @param localeId the locale ID such as "ar_KM"
565      * @param directory the string describing the source directory; currently only "common/main" has
566      *     special paths
567      * @return true if required, else false
568      */
areSpecialPathsRequired(String localeId, String directory)569     private static boolean areSpecialPathsRequired(String localeId, String directory) {
570 
571         return localeIsSpecial(localeId) && directoryIsSpecial(directory);
572     }
573 
directoryIsSpecial(String directory)574     private static boolean directoryIsSpecial(String directory) {
575         return directory.contains("common/main");
576     }
577 
localeIsSpecial(String localeId)578     private static boolean localeIsSpecial(String localeId) {
579         return localeId.equals("ar") || (localeId.startsWith("ar_") && !"ar_001".equals(localeId));
580     }
581 
582     private static final String[] SPECIAL_PATHS =
583             new String[] {
584                 "//ldml/numbers/defaultNumberingSystem",
585                 "//ldml/numbers/defaultNumberingSystem[@alt=\"latn\"]"
586             };
587     private static final Set<String> SPECIAL_PATH_SET = new TreeSet<>(Arrays.asList(SPECIAL_PATHS));
588 
589     /**
590      * Is the given path a "special path" required to be explicitly included?
591      *
592      * @param xpath the path
593      * @return true if this particular path is required, else false
594      */
pathIsSpecial(String xpath)595     private static boolean pathIsSpecial(String xpath) {
596         return SPECIAL_PATH_SET.contains(xpath);
597     }
598 
addSpecialPathsIfMissing(Set<String> toRetainSpecial)599     private static void addSpecialPathsIfMissing(Set<String> toRetainSpecial) {
600         for (String xpath : SPECIAL_PATH_SET) {
601             if (!toRetainSpecial.contains(xpath)) {
602                 toRetainSpecial.add(xpath);
603             }
604         }
605     }
606 
607     /**
608      * Confirm that a file (in the destination) is present for each "special" locale (in the
609      * source(), and that each such destination file contains all the required "special" paths
610      *
611      * @param sourceDir a directory like ".../common/main"
612      * @param destDir a directory
613      */
doubleCheckSpecialPaths(File sourceDir, File destDir)614     private static void doubleCheckSpecialPaths(File sourceDir, File destDir) {
615         Set<String> sorted = new TreeSet<>();
616         sorted.addAll(Arrays.asList(sourceDir.list()));
617         Factory factory = Factory.make(destDir.toString(), ".*");
618         sorted.forEach(
619                 file -> {
620                     doubleCheckLocale(sourceDir, destDir, file, factory);
621                 });
622     }
623 
doubleCheckLocale( File sourceDir, File destDir, String file, Factory factory)624     private static void doubleCheckLocale(
625             File sourceDir, File destDir, String file, Factory factory) {
626         if (!file.endsWith(".xml")) {
627             return;
628         }
629         String localeId = getLocaleIdFromFileName(file);
630         if (!localeIsSpecial(localeId)) {
631             return;
632         }
633         File destFile = new File(destDir, file);
634         if (!destFile.exists()) {
635             throw new InternalCldrException("doubleCheckLocale FILE NOT FOUND: " + destFile);
636         }
637         // Note: factory.make will fail here unless ../common/dtd/ldml.dtd exists in relation to the
638         // destination folder
639         CLDRFile cldrFileUnresolved = factory.make(localeId, false);
640         for (String xpath : SPECIAL_PATH_SET) {
641             String value = cldrFileUnresolved.getStringValue(xpath);
642             if (value == null) {
643                 throw new InternalCldrException(
644                         "Locale " + localeId + " missing required special path " + xpath);
645             }
646             if (CldrUtility.INHERITANCE_MARKER.equals(value)) {
647                 throw new InternalCldrException(
648                         "Locale "
649                                 + localeId
650                                 + " has INHERITANCE_MARKER for required special path "
651                                 + xpath);
652             }
653         }
654     }
655 
showIfNonZero(String localeId, String title, Set<String> toRemove)656     private static void showIfNonZero(String localeId, String title, Set<String> toRemove) {
657         if (toRemove.size() != 0) {
658             System.out.println(localeId + ": " + title + ": " + toRemove.size());
659         }
660     }
661 
662     /**
663      * Exceptions for generating production data, because the results would not pass
664      * CompareResolved.
665      */
666     static final Multimap<String, String> LOCALE_TO_PATH_EXCEPTIONS =
667             ImmutableListMultimap.<String, String>builder()
668                     .put(
669                             "oc_ES",
670                             "//ldml/localeDisplayNames/territories/territory[@type=\"HK\"][@alt=\"short\"]")
671                     .put(
672                             "zh_Hant_MO",
673                             "//ldml/localeDisplayNames/languages/language[@type=\"yue\"][@alt=\"menu\"]")
674                     .put(
675                             "zh_Hant_MO",
676                             "//ldml/localeDisplayNames/territories/territory[@type=\"CI\"][@alt=\"variant\"]")
677                     .put(
678                             "zh_Hant_HK",
679                             "//ldml/localeDisplayNames/languages/language[@type=\"yue\"][@alt=\"menu\"]")
680                     .put(
681                             "zh_Hant_HK",
682                             "//ldml/localeDisplayNames/territories/territory[@type=\"CI\"][@alt=\"variant\"]")
683                     .put(
684                             "ru_BY",
685                             "//ldml/numbers/currencies/currency[@type=\"RUR\"]/symbol[@alt=\"narrow\"]")
686                     .put(
687                             "oc_ES",
688                             "//ldml/localeDisplayNames/territories/territory[@type=\"HK\"][@alt=\"short\"]")
689                     .put(
690                             "el_POLYTON",
691                             "//ldml/localeDisplayNames/territories/territory[@type=\"CI\"][@alt=\"variant\"]")
692                     .put(
693                             "be_TARASK",
694                             "//ldml/localeDisplayNames/languages/language[@type=\"az\"][@alt=\"short\"]")
695                     .build();
696 
697     /**
698      * Check if a path is equal, or if it is a suitable alt variant If it returns true, the value
699      * will be removed; false will retain it.
700      */
pathEqualsOrIsOkAltVariantOf( CLDRFile cldrFileResolved, String desiredPath, String foundPath, String localeId, String foundLocaleId)701     private static boolean pathEqualsOrIsOkAltVariantOf(
702             CLDRFile cldrFileResolved,
703             String desiredPath,
704             String foundPath,
705             String localeId,
706             String foundLocaleId) {
707         if (LOCALE_TO_PATH_EXCEPTIONS.containsEntry(localeId, desiredPath)) {
708             return false;
709         }
710         /*
711          * Protect against bad case, such as:
712          *
713          * pt      //ldml/localeDisplayNames/languages/language[@type="en_US"]                  ↑↑↑ (= inglês americano)
714          * pt      //ldml/localeDisplayNames/languages/language[@type="en_US"][@alt="short"]    inglês (EUA)
715          *
716          * pt_AO   //ldml/localeDisplayNames/languages/language[@type="en_US"][@alt="short"]    inglês (EUA)
717          *
718          * When processing pt, its short value disappears, because its value = lateral inherited (constructed) value from pt
719          * When processing pt_AO, its short value is also removed, because it is the same as the pt
720          * But then when it is constructed, its value =
721          *
722          */
723         if (desiredPath.equals(foundPath)) {
724             // TODO for a full fix, we need to check that the foundLocaleId/foundPath will not
725             // disappear when it is processed.
726             // For now, we are using the LOCALE_TO_PATH_EXCEPTIONS.
727             return true;
728         }
729         if (!foundLocaleId.equals(
730                 localeId)) { // extra condition on alt values; has to be found in the same locale
731             return false;
732         }
733         if (desiredPath.contains("type=\"en_GB\"") && desiredPath.contains("alt=")) {
734             int debug = 0;
735         }
736         if (foundPath == null || foundPath.equals(GlossonymConstructor.PSEUDO_PATH)) {
737             if (!LocaleIDParser.isL1(localeId)) {
738                 return true;
739             }
740             // We can do this, because the bailey value has already been checked.
741             // Since it isn't null, a null or PSEUDO_PATH indicates a constructed alt value.
742             return false;
743         }
744         XPathParts desiredPathParts = XPathParts.getFrozenInstance(desiredPath);
745         XPathParts foundPathParts = XPathParts.getFrozenInstance(foundPath);
746         if (desiredPathParts.size() != foundPathParts.size()) {
747             return false;
748         }
749         for (int e = 0; e < desiredPathParts.size(); ++e) {
750             String element1 = desiredPathParts.getElement(e);
751             String element2 = foundPathParts.getElement(e);
752             if (!element1.equals(element2)) {
753                 return false;
754             }
755             Map<String, String> attr1 = desiredPathParts.getAttributes(e);
756             Map<String, String> attr2 = foundPathParts.getAttributes(e);
757             if (attr1.equals(attr2)) {
758                 continue;
759             }
760             Set<String> keys1 = attr1.keySet();
761             Set<String> keys2 = attr2.keySet();
762             for (String attr : Sets.union(keys1, keys2)) {
763                 if (attr.equals("alt")) {
764                     continue;
765                 }
766                 if (!Objects.equals(attr1.get(attr), attr2.get(attr))) {
767                     return false;
768                 }
769             }
770         }
771         return true;
772     }
773 
isDateTimePath(String xpath)774     private static boolean isDateTimePath(String xpath) {
775         return xpath.startsWith("//ldml/dates/calendars/calendar")
776                 && xpath.contains("FormatLength[@type=");
777     }
778 
779     /**
780      * generate full dateTimePaths from any element
781      * //ldml/dates/calendars/calendar[@type="gregorian"]/dateFormats/dateFormatLength[@type=".*"]/dateFormat[@type="standard"]/pattern[@type="standard"]
782      * //ldml/dates/calendars/calendar[@type="gregorian"]/timeFormats/timeFormatLength[@type=".*"]/timeFormat[@type="standard"]/pattern[@type="standard"]
783      * //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/dateTimeFormatLength[@type=".*"]/dateTimeFormat[@type="standard"]/pattern[@type="standard"]
784      */
dateTimePaths(String xpath)785     private static Set<String> dateTimePaths(String xpath) {
786         LinkedHashSet<String> result = new LinkedHashSet<>();
787         String prefix = xpath.substring(0, xpath.indexOf(']') + 2); // get after ]/
788         for (String type : Arrays.asList("date", "time", "dateTime")) {
789             String pattern =
790                     prefix
791                             + "$XFormats/$XFormatLength[@type=\"$Y\"]/$XFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"
792                                     .replace("$X", type);
793             for (String width : Arrays.asList("full", "long", "medium", "short")) {
794                 result.add(pattern.replace("$Y", width));
795             }
796         }
797         return result;
798     }
799 
getChildless( Set<String> emptyLocales, Set<String> available, boolean isAnnotationsDir)800     private static Set<String> getChildless(
801             Set<String> emptyLocales, Set<String> available, boolean isAnnotationsDir) {
802         // first build the parent2child map
803         Multimap<String, String> parent2child = HashMultimap.create();
804         for (String locale : available) {
805             String parent = LocaleIDParser.getParent(locale);
806             if (parent != null) {
807                 parent2child.put(parent, locale);
808             }
809             if (isAnnotationsDir) {
810                 String simpleParent =
811                         LocaleIDParser.getParent(locale, ParentLocaleComponent.collations);
812                 if (simpleParent != null && (parent == null || simpleParent != parent)) {
813                     parent2child.put(simpleParent, locale);
814                 }
815             }
816         }
817 
818         // now cycle through the empties
819         Set<String> result = new HashSet<>();
820         for (String empty : emptyLocales) {
821             if (allChildrenAreEmpty(empty, emptyLocales, parent2child)) {
822                 result.add(empty);
823             }
824         }
825         return result;
826     }
827 
828     /**
829      * Recursively checks that all children are empty (including that there are no children)
830      *
831      * @param locale
832      * @param emptyLocales
833      * @param parent2child
834      * @return
835      */
allChildrenAreEmpty( String locale, Set<String> emptyLocales, Multimap<String, String> parent2child)836     private static boolean allChildrenAreEmpty(
837             String locale, Set<String> emptyLocales, Multimap<String, String> parent2child) {
838 
839         Collection<String> children = parent2child.get(locale);
840         for (String child : children) {
841             if (!emptyLocales.contains(child)) {
842                 return false;
843             }
844             if (!allChildrenAreEmpty(child, emptyLocales, parent2child)) {
845                 return false;
846             }
847         }
848         return true;
849     }
850 
copyFiles(File sourceFile, File destinationFile)851     private static void copyFiles(File sourceFile, File destinationFile) {
852         try {
853             Files.copy(sourceFile, destinationFile);
854         } catch (IOException e) {
855             System.err.println("Can't copy " + sourceFile + " to " + destinationFile + " — " + e);
856         }
857     }
858 }
859