• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu;
4 
5 import static com.google.common.base.Preconditions.checkArgument;
6 import static com.google.common.base.Preconditions.checkNotNull;
7 import static com.google.common.collect.ImmutableList.toImmutableList;
8 import static java.nio.charset.StandardCharsets.UTF_8;
9 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
10 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
11 import static org.unicode.cldr.api.CldrDataType.BCP47;
12 import static org.unicode.cldr.api.CldrDataType.LDML;
13 import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
14 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.BRKITR;
15 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.COLL;
16 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.CURR;
17 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LANG;
18 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LOCALES;
19 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.RBNF;
20 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.REGION;
21 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.UNIT;
22 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.ZONE;
23 
24 import java.io.BufferedWriter;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.InputStreamReader;
28 import java.io.PrintWriter;
29 import java.nio.file.Files;
30 import java.nio.file.Path;
31 import java.util.*;
32 import java.util.function.Predicate;
33 import java.util.stream.Collectors;
34 import java.util.stream.Stream;
35 
36 import org.unicode.cldr.api.CldrData;
37 import org.unicode.cldr.api.CldrDataSupplier;
38 import org.unicode.cldr.api.CldrDataType;
39 import org.unicode.cldr.api.CldrPath;
40 import org.unicode.cldr.api.PathMatcher;
41 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
42 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuVersionInfo;
43 import org.unicode.icu.tool.cldrtoicu.localedistance.LocaleDistanceMapper;
44 import org.unicode.icu.tool.cldrtoicu.mapper.Bcp47Mapper;
45 import org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapper;
46 import org.unicode.icu.tool.cldrtoicu.mapper.CollationMapper;
47 import org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapper;
48 import org.unicode.icu.tool.cldrtoicu.mapper.LocaleMapper;
49 import org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapper;
50 import org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapper;
51 import org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapper;
52 import org.unicode.icu.tool.cldrtoicu.mapper.SupplementalMapper;
53 import org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapper;
54 import org.unicode.icu.tool.cldrtoicu.regex.RegexTransformer;
55 
56 import com.google.common.base.CharMatcher;
57 import com.google.common.collect.HashMultimap;
58 import com.google.common.collect.ImmutableList;
59 import com.google.common.collect.ImmutableListMultimap;
60 import com.google.common.collect.ImmutableMap;
61 import com.google.common.collect.ImmutableSet;
62 import com.google.common.collect.LinkedListMultimap;
63 import com.google.common.collect.ListMultimap;
64 import com.google.common.collect.Maps;
65 import com.google.common.collect.SetMultimap;
66 import com.google.common.collect.Sets;
67 import com.google.common.io.CharStreams;
68 
69 /**
70  * The main converter tool for CLDR to ICU data. To run this tool, you need to supply a suitable
71  * {@link LdmlConverterConfig} instance. There is a simple {@code main()} method available in this
72  * class which can be invoked passing just the desired output directory and which relies on the
73  * presence of several system properties for the remainder of its parameters:
74  * <ul>
75  *     <li>CLDR_DIR: The root of the CLDR release from which CLDR data is read.
76  *     <li>ICU_DIR: The root of the ICU release from which additional "specials" XML data is read.
77  *     <li>CLDR_DTD_CACHE: A temporary directory with the various DTDs cached (this is a legacy
78  *         requirement from the underlying CLDR libraries and might go away one day).
79  * </ul>
80  */
81 public final class LdmlConverter {
82     // TODO: Do all supplemental data in one go and split similarly to locale data (using RbPath).
83     private static final Predicate<CldrPath> GENDER_LIST_PATHS =
84         supplementalMatcher("gender");
85     private static final Predicate<CldrPath> LIKELY_SUBTAGS_PATHS =
86         supplementalMatcher("likelySubtags");
87     private static final Predicate<CldrPath> METAZONE_PATHS =
88         supplementalMatcher("metaZones", "primaryZones");
89     private static final Predicate<CldrPath> METADATA_PATHS =
90         supplementalMatcher("metadata");
91     private static final Predicate<CldrPath> SUPPLEMENTAL_DATA_PATHS =
92         supplementalMatcher(
93             "calendarData",
94             "calendarPreferenceData",
95             "codeMappings",
96             "codeMappingsCurrency",
97             "idValidity",
98             "languageData",
99             "languageMatching",
100             "measurementData",
101             "parentLocales",
102             "subdivisionContainment",
103             "territoryContainment",
104             "territoryInfo",
105             "timeData",
106             "weekData",
107             "weekOfPreference");
108     private static final Predicate<CldrPath> CURRENCY_DATA_PATHS =
109         supplementalMatcher("currencyData");
110     private static final Predicate<CldrPath> UNITS_DATA_PATHS =
111         supplementalMatcher(
112             "convertUnits",
113             "unitConstants",
114             "unitQuantities",
115             "unitPreferenceData");
116     private static final Predicate<CldrPath> GRAMMATICAL_FEATURES_PATHS =
117         supplementalMatcher("grammaticalData");
118     private static final Predicate<CldrPath> NUMBERING_SYSTEMS_PATHS =
119         supplementalMatcher("numberingSystems");
120     private static final Predicate<CldrPath> WINDOWS_ZONES_PATHS =
121         supplementalMatcher("windowsZones");
122 
supplementalMatcher(String... spec)123     private static Predicate<CldrPath> supplementalMatcher(String... spec) {
124         checkArgument(spec.length > 0, "must supply at least one matcher spec");
125         if (spec.length == 1) {
126             return PathMatcher.of("//supplementalData/" + spec[0])::matchesPrefixOf;
127         }
128         return
129             Arrays.stream(spec)
130                 .map(s -> PathMatcher.of("//supplementalData/" + s))
131                 .map(m -> ((Predicate<CldrPath>) m::matchesPrefixOf))
132                 .reduce(p -> false, Predicate::or);
133     }
134 
135     private static RbPath RB_PARENT = RbPath.of("%%Parent");
136     // The quotes below are only so we achieve parity with the manually written alias files.
137     // TODO: Remove unnecessary quotes once the migration to this code is complete.
138     private static RbPath RB_ALIAS = RbPath.of("\"%%ALIAS\"");
139     // Special path for adding to empty files which only exist to complete the parent chain.
140     // TODO: Confirm that this has no meaningful effect and unify "empty" file contents.
141     private static RbPath RB_EMPTY_ALIAS = RbPath.of("___");
142 
143     /**
144      * Output types defining specific subsets of the ICU data which can be converted separately.
145      * This closely mimics the original "NewLdml2IcuConverter" behaviour but could be simplified to
146      * hide what are essentially implementation specific data splits.
147      */
148     public enum OutputType {
149         LOCALES(LDML),
150         BRKITR(LDML),
151         COLL(LDML),
152         RBNF(LDML),
153         DAY_PERIODS(SUPPLEMENTAL),
154         GENDER_LIST(SUPPLEMENTAL),
155         LIKELY_SUBTAGS(SUPPLEMENTAL),
156         SUPPLEMENTAL_DATA(SUPPLEMENTAL),
157         UNITS(SUPPLEMENTAL),
158         CURRENCY_DATA(SUPPLEMENTAL),
159         GRAMMATICAL_FEATURES(SUPPLEMENTAL),
160         METADATA(SUPPLEMENTAL),
161         META_ZONES(SUPPLEMENTAL),
162         NUMBERING_SYSTEMS(SUPPLEMENTAL),
163         PLURALS(SUPPLEMENTAL),
164         PLURAL_RANGES(SUPPLEMENTAL),
165         WINDOWS_ZONES(SUPPLEMENTAL),
166         TRANSFORMS(SUPPLEMENTAL),
167         LOCALE_DISTANCE(SUPPLEMENTAL),
168         VERSION(SUPPLEMENTAL),
169         KEY_TYPE_DATA(BCP47);
170 
171         public static final ImmutableSet<OutputType> ALL = ImmutableSet.copyOf(OutputType.values());
172 
173         private final CldrDataType type;
174 
OutputType(CldrDataType type)175         OutputType(CldrDataType type) {
176             this.type = checkNotNull(type);
177         }
178 
getCldrType()179         CldrDataType getCldrType() {
180             return type;
181         }
182     }
183 
184     // Map to convert the rather arbitrarily defined "output types" to the directories into which
185     // the data is written. This is only for "LDML" types since other mappers don't need to split
186     // data into multiple directories.
187     private static final ImmutableListMultimap<OutputType, IcuLocaleDir> TYPE_TO_DIR =
188         ImmutableListMultimap.<OutputType, IcuLocaleDir>builder()
189             .putAll(OutputType.LOCALES, CURR, LANG, LOCALES, REGION, UNIT, ZONE)
190             .putAll(OutputType.BRKITR, BRKITR)
191             .putAll(OutputType.COLL, COLL)
192             .putAll(OutputType.RBNF, RBNF)
193             .build();
194 
195     /** Converts CLDR data according to the given configuration. */
convert( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)196     public static void convert(
197         CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) {
198         new LdmlConverter(src, supplementalData, config).convertAll();
199     }
200 
201     // The supplier for all data to be converted.
202     private final CldrDataSupplier src;
203     // Supplemental data available to mappers if needed.
204     private final SupplementalData supplementalData;
205     // The configuration controlling conversion behaviour.
206     private final LdmlConverterConfig config;
207     // The set of expanded target locale IDs.
208     // TODO: Make available IDs include specials files (or fail if specials are not available).
209     private final ImmutableSet<String> availableIds;
210     // Transformer for locale data.
211     private final PathValueTransformer localeTransformer;
212     // Transformer for supplemental data.
213     private final PathValueTransformer supplementalTransformer;
214     // Header string to go into every ICU data and transliteration rule file (comment prefixes
215     // are not present and must be added by the code writing the file).
216     private final ImmutableList<String> fileHeader;
217 
LdmlConverter( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)218     private LdmlConverter(
219         CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) {
220         this.src = checkNotNull(src);
221         this.supplementalData = checkNotNull(supplementalData);
222         this.config = checkNotNull(config);
223         this.availableIds = ImmutableSet.copyOf(
224             Sets.intersection(supplementalData.getAvailableLocaleIds(), config.getAllLocaleIds()));
225         // Load the remaining path value transformers.
226         this.supplementalTransformer =
227             RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_supplemental.txt"),
228                 IcuFunctions.ALGORITHM_FN,
229                 IcuFunctions.DATE_FN,
230                 IcuFunctions.DAY_NUMBER_FN,
231                 IcuFunctions.EXP_FN,
232                 IcuFunctions.YMD_FN);
233         this.localeTransformer =
234             RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_locale.txt"),
235                 IcuFunctions.CONTEXT_TRANSFORM_INDEX_FN);
236         this.fileHeader = readLinesFromResource("/ldml2icu_header.txt");
237     }
238 
convertAll()239     private void convertAll() {
240         processLdml();
241         processSupplemental();
242         if (config.emitReport()) {
243             System.out.println("Supplemental Data Transformer=" + supplementalTransformer);
244             System.out.println("Locale Data Transformer=" + localeTransformer);
245         }
246     }
247 
readLinesFromResource(String name)248     private static ImmutableList<String> readLinesFromResource(String name) {
249         try (InputStream in = LdmlConverter.class.getResourceAsStream(name)) {
250             return ImmutableList.copyOf(CharStreams.readLines(new InputStreamReader(in, UTF_8)));
251         } catch (IOException e) {
252             throw new RuntimeException("cannot read resource: " + name, e);
253         }
254     }
255 
loadSpecialsData(String localeId)256     private Optional<CldrData> loadSpecialsData(String localeId) {
257         String expected = localeId + ".xml";
258         try (Stream<Path> files = Files.walk(config.getSpecialsDir())) {
259             Set<Path> xmlFiles = files
260                 .filter(Files::isRegularFile)
261                 .filter(f -> f.getFileName().toString().equals(expected))
262                 .collect(Collectors.toSet());
263             return !xmlFiles.isEmpty()
264                 ? Optional.of(
265                 CldrDataSupplier.forCldrFiles(LDML, config.getMinimumDraftStatus(), xmlFiles))
266                 : Optional.empty();
267         } catch (IOException e) {
268             throw new RuntimeException(
269                 "error processing specials directory: " + config.getSpecialsDir(), e);
270         }
271     }
272 
processLdml()273     private void processLdml() {
274         ImmutableList<IcuLocaleDir> splitDirs =
275             config.getOutputTypes().stream()
276                 .filter(t -> t.getCldrType() == LDML)
277                 .flatMap(t -> TYPE_TO_DIR.get(t).stream())
278                 .collect(toImmutableList());
279         if (splitDirs.isEmpty()) {
280             return;
281         }
282 
283         String cldrVersion = config.getVersionInfo().getCldrVersion();
284 
285         Map<IcuLocaleDir, DependencyGraph> graphMetadata = new HashMap<>();
286         splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph(cldrVersion)));
287 
288         SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create();
289         Path baseDir = config.getOutputDir();
290 
291         System.out.println("processing standard ldml files");
292         for (String id : config.getAllLocaleIds()) {
293             // Skip "target" IDs that are aliases (they are handled later).
294             if (!availableIds.contains(id)) {
295                 continue;
296             }
297             // TODO: Remove the following skip when ICU-20997 is fixed
298             if (id.contains("VALENCIA") || id.contains("TARASK")) {
299                 System.out.println("(skipping " + id + " until ICU-20997 is fixed)");
300                 continue;
301             }
302 
303             IcuData icuData = new IcuData(id, true);
304 
305             Optional<CldrData> specials = loadSpecialsData(id);
306             CldrData unresolved = src.getDataForLocale(id, UNRESOLVED);
307 
308             BreakIteratorMapper.process(icuData, unresolved, specials);
309             CollationMapper.process(icuData, unresolved, specials, cldrVersion);
310             RbnfMapper.process(icuData, unresolved, specials);
311 
312             CldrData resolved = src.getDataForLocale(id, RESOLVED);
313             Optional<String> defaultCalendar = supplementalData.getDefaultCalendar(id);
314             LocaleMapper.process(
315                 icuData, unresolved, resolved, specials, localeTransformer, defaultCalendar);
316 
317             ListMultimap<IcuLocaleDir, RbPath> splitPaths = LinkedListMultimap.create();
318             for (RbPath p : icuData.getPaths()) {
319                 String rootName = getBaseSegmentName(p.getSegment(0));
320                 splitPaths.put(LOCALE_SPLIT_INFO.getOrDefault(rootName, LOCALES), p);
321             }
322 
323             Optional<String> parent = supplementalData.getExplicitParentLocaleOf(id);
324             // We always write base languages (even if empty).
325             boolean isBaseLanguage = !id.contains("_");
326             // Run through all directories (not just the keySet() of the split path map) since we
327             // sometimes write empty files.
328             for (IcuLocaleDir dir : splitDirs) {
329                 Set<String> targetIds = config.getTargetLocaleIds(dir);
330                 if (!targetIds.contains(id)) {
331                     if (!splitPaths.get(dir).isEmpty()) {
332                         System.out.format(
333                             "target IDs for %s does not contain %s, but it has data: %s\n",
334                             dir, id, splitPaths.get(dir));
335                     }
336                     continue;
337                 }
338 
339                 Path outDir = baseDir.resolve(dir.getOutputDir());
340                 IcuData splitData = new IcuData(icuData.getName(), icuData.hasFallback());
341 
342                 // The split data can still be empty for this directory, but that's expected (it
343                 // might only be written because it has an explicit parent added below).
344                 splitPaths.get(dir).forEach(p -> splitData.add(p, icuData.get(p)));
345 
346                 // If we add an explicit parent locale, it forces the data to be written. This is
347                 // where we check for forced overrides of the parent relationship (which is a per
348                 // directory thing).
349                 getIcuParent(id, parent, dir).ifPresent(p -> {
350                     splitData.add(RB_PARENT, p);
351                     graphMetadata.get(dir).addParent(id, p);
352                 });
353 
354                 if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) {
355                     if (id.equals("root")) {
356                         splitData.setVersion(cldrVersion);
357                     }
358                     write(splitData, outDir, false);
359                     writtenLocaleIds.put(dir, id);
360                 }
361             }
362         }
363 
364         System.out.println("processing alias ldml files");
365         for (IcuLocaleDir dir : splitDirs) {
366             Path outDir = baseDir.resolve(dir.getOutputDir());
367             Set<String> targetIds = config.getTargetLocaleIds(dir);
368             DependencyGraph depGraph = graphMetadata.get(dir);
369 
370             // TODO: Maybe calculate alias map directly into the dependency graph?
371             Map<String, String> aliasMap = getAliasMap(targetIds, dir);
372             aliasMap.forEach((s, t) -> {
373                 depGraph.addAlias(s, t);
374                 writeAliasFile(s, t, outDir);
375                 // It's only important to record which alias files are written because of forced
376                 // aliases, but since it's harmless otherwise, we just do it unconditionally.
377                 // Normal alias files don't affect the empty file calculation, but forced ones can.
378                 writtenLocaleIds.put(dir, s);
379             });
380 
381             calculateEmptyFiles(writtenLocaleIds.get(dir), aliasMap.values())
382                 .forEach(id -> writeEmptyFile(id, outDir, aliasMap.values()));
383 
384             writeDependencyGraph(outDir, depGraph);
385         }
386     }
387 
388 
389     private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%");
390 
391     // Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type
392     // annotations (e.g. "languages:intvector"). We strip these when considering the element name.
getBaseSegmentName(String segment)393     private static String getBaseSegmentName(String segment) {
394         int idx = PATH_MODIFIER.indexIn(segment);
395         return idx == -1 ? segment : segment.substring(0, idx);
396     }
397 
398     /*
399      * There are four reasons for treating a locale ID as an alias.
400      * 1: It contains deprecated subtags (e.g. "sr_YU", which should be "sr_Cyrl_RS").
401      * 2: It has no CLDR data but is missing a script subtag.
402      * 3: It is one of the special "phantom" alias which cannot be represented normally
403      *    and must be manually mapped (e.g. legacy locale IDs which don't even parse).
404      * 4: It is a "super special" forced alias, which might replace existing aliases in
405      *    some output directories.
406      */
getAliasMap(Set<String> localeIds, IcuLocaleDir dir)407     private Map<String, String> getAliasMap(Set<String> localeIds, IcuLocaleDir dir) {
408         // Even forced aliases only apply if they are in the set of locale IDs for the directory.
409         Map<String, String> forcedAliases =
410             Maps.filterKeys(config.getForcedAliases(dir), localeIds::contains);
411 
412         Map<String, String> aliasMap = new LinkedHashMap<>();
413         for (String id : localeIds) {
414             if (forcedAliases.containsKey(id)) {
415                 // Forced aliases will be added later and don't need to be processed here. This
416                 // is especially necessary if the ID is not structurally valid (e.g. "no_NO_NY")
417                 // since that cannot be processed by the code below.
418                 continue;
419             }
420             String canonicalId = supplementalData.replaceDeprecatedTags(id);
421             if (!canonicalId.equals(id)) {
422                 // If the canonical form of an ID differs from the requested ID, the this is an
423                 // alias, and just needs to point to the canonical ID.
424                 aliasMap.put(id, canonicalId);
425                 continue;
426             }
427             if (availableIds.contains(id)) {
428                 // If it's canonical and supported, it's not an alias.
429                 continue;
430             }
431             // If the requested locale is not supported, maximize it and alias to that.
432             String maximizedId = supplementalData.maximize(id)
433                 .orElseThrow(() -> new IllegalArgumentException("unsupported locale ID: " + id));
434             // We can't alias to ourselves and we shouldn't be here is the ID was already maximal.
435             checkArgument(!maximizedId.equals(id), "unsupported maximized locale ID: %s", id);
436             aliasMap.put(id, maximizedId);
437         }
438         // Important that we overwrite entries which might already exist here, since we might have
439         // already calculated a "natural" alias for something that we want to force (and we should
440         // replace the existing target, since that affects how we determine empty files later).
441         aliasMap.putAll(forcedAliases);
442         return aliasMap;
443     }
444 
445     /*
446      * Helper to determine the correct parent ID to be written into the ICU data file. The rules
447      * are:
448      * 1: If no forced parent exists (common) write the explicit parent (if that exists)
449      * 2: If a forced parent exists, but the forced value is what you would get by just truncating
450      *    the current locale ID, write nothing (ICU libraries truncate when no parent is set).
451      * 3: Write the forced parent (this is an exceptional case, and may not even occur in data).
452      */
getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir)453     private Optional<String> getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir) {
454         String forcedParentId = config.getForcedParents(dir).get(id);
455         if (forcedParentId == null) {
456             return parent;
457         }
458         return id.contains("_") && forcedParentId.regionMatches(0, id, 0, id.lastIndexOf('_'))
459             ? Optional.empty() : Optional.of(forcedParentId);
460     }
461 
processSupplemental()462     private void processSupplemental() {
463         for (OutputType type : config.getOutputTypes()) {
464             if (type.getCldrType() == LDML) {
465                 continue;
466             }
467             System.out.println("processing supplemental type " + type);
468             switch (type) {
469             case DAY_PERIODS:
470                 write(DayPeriodsMapper.process(src), "misc");
471                 break;
472 
473             case GENDER_LIST:
474                 processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false);
475                 break;
476 
477             case LIKELY_SUBTAGS:
478                 processSupplemental("likelySubtags", LIKELY_SUBTAGS_PATHS, "misc", false);
479                 break;
480 
481             case SUPPLEMENTAL_DATA:
482                 processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true);
483                 break;
484 
485             case UNITS:
486                 processSupplemental("units", UNITS_DATA_PATHS, "misc", true);
487                 break;
488 
489             case CURRENCY_DATA:
490                 processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", false);
491                 break;
492 
493             case GRAMMATICAL_FEATURES:
494                 processSupplemental("grammaticalFeatures", GRAMMATICAL_FEATURES_PATHS, "misc", false);
495                 break;
496 
497             case METADATA:
498                 processSupplemental("metadata", METADATA_PATHS, "misc", false);
499                 break;
500 
501             case META_ZONES:
502                 processSupplemental("metaZones", METAZONE_PATHS, "misc", false);
503                 break;
504 
505             case NUMBERING_SYSTEMS:
506                 processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false);
507                 break;
508 
509             case PLURALS:
510                 write(PluralsMapper.process(src), "misc");
511                 break;
512 
513             case PLURAL_RANGES:
514                 write(PluralRangesMapper.process(src), "misc");
515                 break;
516 
517             case LOCALE_DISTANCE:
518                 write(LocaleDistanceMapper.process(src), "misc");
519                 break;
520 
521             case WINDOWS_ZONES:
522                 processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false);
523                 break;
524 
525             case TRANSFORMS:
526                 Path transformDir = createDirectory(config.getOutputDir().resolve("translit"));
527                 write(TransformsMapper.process(src, transformDir, fileHeader), transformDir, false);
528                 break;
529 
530             case VERSION:
531                 writeIcuVersionInfo();
532                 break;
533 
534             case KEY_TYPE_DATA:
535                 Bcp47Mapper.process(src).forEach(d -> write(d, "misc"));
536                 break;
537 
538             default:
539                 throw new AssertionError("Unsupported supplemental type: " + type);
540             }
541         }
542     }
543 
544     private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion");
545 
processSupplemental( String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion)546     private void processSupplemental(
547         String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion) {
548         IcuData icuData =
549             SupplementalMapper.process(src, supplementalTransformer, label, paths);
550         // A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the
551         // supplemental data XML files.
552         if (addCldrVersion) {
553             // Not the same path as used by "setVersion()"
554             icuData.add(RB_CLDR_VERSION, config.getVersionInfo().getCldrVersion());
555         }
556         write(icuData, dir);
557     }
558 
writeAliasFile(String srcId, String destId, Path dir)559     private void writeAliasFile(String srcId, String destId, Path dir) {
560         IcuData icuData = new IcuData(srcId, true);
561         icuData.add(RB_ALIAS, destId);
562         // Allow overwrite for aliases since some are "forced" and overwrite existing targets.
563         // TODO: Maybe tighten this up so only forced aliases for existing targets are overwritten.
564         write(icuData, dir, true);
565     }
566 
writeEmptyFile(String id, Path dir, Collection<String> aliasTargets)567     private void writeEmptyFile(String id, Path dir, Collection<String> aliasTargets) {
568         IcuData icuData = new IcuData(id, true);
569         // TODO: Document the reason for this (i.e. why does it matter what goes into empty files?)
570         if (aliasTargets.contains(id)) {
571             icuData.setFileComment("generated alias target");
572             icuData.add(RB_EMPTY_ALIAS, "");
573         } else {
574             // These empty files only exist because the target of an alias has a parent locale
575             // which is itself not in the set of written ICU files. An "indirect alias target".
576             // No need to add data: Just write a resource bundle with an empty top-level table.
577         }
578         write(icuData, dir, false);
579     }
580 
writeIcuVersionInfo()581     private void writeIcuVersionInfo() {
582         IcuVersionInfo versionInfo = config.getVersionInfo();
583         IcuData versionData = new IcuData("icuver", false);
584         versionData.add(RbPath.of("ICUVersion"), versionInfo.getIcuVersion());
585         versionData.add(RbPath.of("DataVersion"), versionInfo.getIcuDataVersion());
586         versionData.add(RbPath.of("CLDRVersion"), versionInfo.getCldrVersion());
587         // Write file via non-helper methods since we need to include a legacy copyright.
588         Path miscDir = config.getOutputDir().resolve("misc");
589         createDirectory(miscDir);
590         ImmutableList<String> versionHeader = ImmutableList.<String>builder()
591             .addAll(fileHeader)
592             .add(
593                 "***************************************************************************",
594                 "*",
595                 "* Copyright (C) 2010-2016 International Business Machines",
596                 "* Corporation and others.  All Rights Reserved.",
597                 "*",
598                 "***************************************************************************")
599             .build();
600         IcuTextWriter.writeToFile(versionData, miscDir, versionHeader, false);
601     }
602 
603     // Commonest case for writing data files in "normal" directories.
write(IcuData icuData, String dir)604     private void write(IcuData icuData, String dir) {
605         write(icuData, config.getOutputDir().resolve(dir), false);
606     }
607 
write(IcuData icuData, Path dir, boolean allowOverwrite)608     private void write(IcuData icuData, Path dir, boolean allowOverwrite) {
609         createDirectory(dir);
610         IcuTextWriter.writeToFile(icuData, dir, fileHeader, allowOverwrite);
611     }
612 
createDirectory(Path dir)613     private Path createDirectory(Path dir) {
614         try {
615             Files.createDirectories(dir);
616         } catch (IOException e) {
617             throw new RuntimeException("cannot create directory: " + dir, e);
618         }
619         return dir;
620     }
621 
writeDependencyGraph(Path dir, DependencyGraph depGraph)622     private void writeDependencyGraph(Path dir, DependencyGraph depGraph) {
623         createDirectory(dir);
624         try (BufferedWriter w = Files.newBufferedWriter(dir.resolve("LOCALE_DEPS.json"), UTF_8);
625             PrintWriter out = new PrintWriter(w)) {
626             depGraph.writeJsonTo(out, fileHeader);
627             out.flush();
628         } catch (IOException e) {
629             throw new RuntimeException("cannot write dependency graph file: " + dir, e);
630         }
631     }
632 
633     // The set of IDs to process is:
634     // * any file that was written
635     // * any alias target (not written)
636     //
637     // From which we generate the complete "closure" under the "getParent()" function. This set
638     // contains all file (written or not) which need to exist to complete the locale hierarchy.
639     //
640     // Then we remove all the written files to just leave the ones that need to be generated.
641     // This is a simple and robust approach that handles things like "gaps" in non-aliased
642     // locale IDs, where an intermediate parent is not present.
calculateEmptyFiles( Set<String> writtenIds, Collection<String> aliasTargetIds)643     private ImmutableSet<String> calculateEmptyFiles(
644         Set<String> writtenIds, Collection<String> aliasTargetIds) {
645 
646         Set<String> seedIds = new HashSet<>(writtenIds);
647         seedIds.addAll(aliasTargetIds);
648         // Be nice and sort the output (makes easier debugging).
649         Set<String> allIds = new TreeSet<>();
650         for (String id : seedIds) {
651             while (!id.equals("root") && !allIds.contains(id)) {
652                 allIds.add(id);
653                 id = supplementalData.getParent(id);
654             }
655         }
656         return ImmutableSet.copyOf(Sets.difference(allIds, writtenIds));
657     }
658 
659     private static final ImmutableMap<String, IcuLocaleDir> LOCALE_SPLIT_INFO =
660         ImmutableMap.<String, IcuLocaleDir>builder()
661             // BRKITR
662             .put("boundaries", BRKITR)
663             .put("dictionaries", BRKITR)
664             .put("exceptions", BRKITR)
665             // COLL
666             .put("collations", COLL)
667             .put("depends", COLL)
668             .put("UCARules", COLL)
669             // CURR
670             .put("Currencies", CURR)
671             .put("CurrencyPlurals", CURR)
672             .put("CurrencyUnitPatterns", CURR)
673             .put("currencySpacing", CURR)
674             // LANG
675             .put("Keys", LANG)
676             .put("Languages", LANG)
677             .put("Scripts", LANG)
678             .put("Types", LANG)
679             .put("Variants", LANG)
680             .put("characterLabelPattern", LANG)
681             .put("codePatterns", LANG)
682             .put("localeDisplayPattern", LANG)
683             // RBNF
684             .put("RBNFRules", RBNF)
685             // REGION
686             .put("Countries", REGION)
687             // UNIT
688             .put("durationUnits", UNIT)
689             .put("units", UNIT)
690             .put("unitsShort", UNIT)
691             .put("unitsNarrow", UNIT)
692             // ZONE
693             .put("zoneStrings", ZONE)
694             .build();
695 }
696