• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu;
4 
5 import static com.google.common.base.Preconditions.checkArgument;
6 import static com.google.common.base.Preconditions.checkNotNull;
7 import static com.google.common.collect.ImmutableList.toImmutableList;
8 import static java.nio.charset.StandardCharsets.UTF_8;
9 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
10 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
11 import static org.unicode.cldr.api.CldrDataType.BCP47;
12 import static org.unicode.cldr.api.CldrDataType.LDML;
13 import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
14 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.BRKITR;
15 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.COLL;
16 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.CURR;
17 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LANG;
18 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LOCALES;
19 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.RBNF;
20 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.REGION;
21 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.UNIT;
22 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.ZONE;
23 
24 import java.io.BufferedWriter;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.InputStreamReader;
28 import java.io.PrintWriter;
29 import java.nio.file.Files;
30 import java.nio.file.Path;
31 import java.util.*;
32 import java.util.function.Predicate;
33 import java.util.stream.Collectors;
34 import java.util.stream.Stream;
35 
36 import org.unicode.cldr.api.CldrData;
37 import org.unicode.cldr.api.CldrDataSupplier;
38 import org.unicode.cldr.api.CldrDataType;
39 import org.unicode.cldr.api.CldrPath;
40 import org.unicode.cldr.api.PathMatcher;
41 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
42 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuVersionInfo;
43 import org.unicode.icu.tool.cldrtoicu.localedistance.LocaleDistanceMapper;
44 import org.unicode.icu.tool.cldrtoicu.mapper.Bcp47Mapper;
45 import org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapper;
46 import org.unicode.icu.tool.cldrtoicu.mapper.CollationMapper;
47 import org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapper;
48 import org.unicode.icu.tool.cldrtoicu.mapper.LocaleMapper;
49 import org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapper;
50 import org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapper;
51 import org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapper;
52 import org.unicode.icu.tool.cldrtoicu.mapper.SupplementalMapper;
53 import org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapper;
54 import org.unicode.icu.tool.cldrtoicu.regex.RegexTransformer;
55 
56 import com.google.common.base.CharMatcher;
57 import com.google.common.collect.HashMultimap;
58 import com.google.common.collect.ImmutableList;
59 import com.google.common.collect.ImmutableListMultimap;
60 import com.google.common.collect.ImmutableMap;
61 import com.google.common.collect.ImmutableSet;
62 import com.google.common.collect.LinkedListMultimap;
63 import com.google.common.collect.ListMultimap;
64 import com.google.common.collect.Maps;
65 import com.google.common.collect.SetMultimap;
66 import com.google.common.collect.Sets;
67 import com.google.common.io.CharStreams;
68 
69 /**
70  * The main converter tool for CLDR to ICU data. To run this tool, you need to supply a suitable
71  * {@link LdmlConverterConfig} instance. There is a simple {@code main()} method available in this
72  * class which can be invoked passing just the desired output directory and which relies on the
73  * presence of several system properties for the remainder of its parameters:
74  * <ul>
75  *     <li>CLDR_DIR: The root of the CLDR release from which CLDR data is read.
76  *     <li>ICU_DIR: The root of the ICU release from which additional "specials" XML data is read.
77  *     <li>CLDR_DTD_CACHE: A temporary directory with the various DTDs cached (this is a legacy
78  *         requirement from the underlying CLDR libraries and might go away one day).
79  * </ul>
80  */
81 public final class LdmlConverter {
82     // TODO: Do all supplemental data in one go and split similarly to locale data (using RbPath).
83     private static final Predicate<CldrPath> GENDER_LIST_PATHS =
84         supplementalMatcher("gender");
85     private static final Predicate<CldrPath> LIKELY_SUBTAGS_PATHS =
86         supplementalMatcher("likelySubtags");
87     private static final Predicate<CldrPath> METAZONE_PATHS =
88         supplementalMatcher("metaZones", "primaryZones");
89     private static final Predicate<CldrPath> METADATA_PATHS =
90         supplementalMatcher("metadata");
91     private static final Predicate<CldrPath> SUPPLEMENTAL_DATA_PATHS =
92         supplementalMatcher(
93             "calendarData",
94             "calendarPreferenceData",
95             "codeMappings",
96             "codeMappingsCurrency",
97             "idValidity",
98             "languageData",
99             "languageMatching",
100             "measurementData",
101             "parentLocales",
102             "personNamesDefaults",
103             "subdivisionContainment",
104             "territoryContainment",
105             "territoryInfo",
106             "timeData",
107             "weekData",
108             "weekOfPreference");
109     private static final Predicate<CldrPath> CURRENCY_DATA_PATHS =
110         supplementalMatcher("currencyData");
111     private static final Predicate<CldrPath> UNITS_DATA_PATHS =
112         supplementalMatcher(
113             "convertUnits",
114             "unitConstants",
115             "unitQuantities",
116             "unitPreferenceData");
117     private static final Predicate<CldrPath> GRAMMATICAL_FEATURES_PATHS =
118         supplementalMatcher("grammaticalData");
119     private static final Predicate<CldrPath> NUMBERING_SYSTEMS_PATHS =
120         supplementalMatcher("numberingSystems");
121     private static final Predicate<CldrPath> WINDOWS_ZONES_PATHS =
122         supplementalMatcher("windowsZones");
123 
supplementalMatcher(String... spec)124     private static Predicate<CldrPath> supplementalMatcher(String... spec) {
125         checkArgument(spec.length > 0, "must supply at least one matcher spec");
126         if (spec.length == 1) {
127             return PathMatcher.of("//supplementalData/" + spec[0])::matchesPrefixOf;
128         }
129         return
130             Arrays.stream(spec)
131                 .map(s -> PathMatcher.of("//supplementalData/" + s))
132                 .map(m -> ((Predicate<CldrPath>) m::matchesPrefixOf))
133                 .reduce(p -> false, Predicate::or);
134     }
135 
136     private static RbPath RB_PARENT = RbPath.of("%%Parent");
137     // The quotes below are only so we achieve parity with the manually written alias files.
138     // TODO: Remove unnecessary quotes once the migration to this code is complete.
139     private static RbPath RB_ALIAS = RbPath.of("\"%%ALIAS\"");
140     // Special path for adding to empty files which only exist to complete the parent chain.
141     // TODO: Confirm that this has no meaningful effect and unify "empty" file contents.
142     private static RbPath RB_EMPTY_ALIAS = RbPath.of("___");
143 
144     /**
145      * Output types defining specific subsets of the ICU data which can be converted separately.
146      * This closely mimics the original "NewLdml2IcuConverter" behaviour but could be simplified to
147      * hide what are essentially implementation specific data splits.
148      */
149     public enum OutputType {
150         LOCALES(LDML),
151         BRKITR(LDML),
152         COLL(LDML),
153         RBNF(LDML),
154         DAY_PERIODS(SUPPLEMENTAL),
155         GENDER_LIST(SUPPLEMENTAL),
156         LIKELY_SUBTAGS(SUPPLEMENTAL),
157         SUPPLEMENTAL_DATA(SUPPLEMENTAL),
158         UNITS(SUPPLEMENTAL),
159         CURRENCY_DATA(SUPPLEMENTAL),
160         GRAMMATICAL_FEATURES(SUPPLEMENTAL),
161         METADATA(SUPPLEMENTAL),
162         META_ZONES(SUPPLEMENTAL),
163         NUMBERING_SYSTEMS(SUPPLEMENTAL),
164         PLURALS(SUPPLEMENTAL),
165         PLURAL_RANGES(SUPPLEMENTAL),
166         WINDOWS_ZONES(SUPPLEMENTAL),
167         TRANSFORMS(SUPPLEMENTAL),
168         LOCALE_DISTANCE(SUPPLEMENTAL),
169         VERSION(SUPPLEMENTAL),
170         KEY_TYPE_DATA(BCP47);
171 
172         public static final ImmutableSet<OutputType> ALL = ImmutableSet.copyOf(OutputType.values());
173 
174         private final CldrDataType type;
175 
OutputType(CldrDataType type)176         OutputType(CldrDataType type) {
177             this.type = checkNotNull(type);
178         }
179 
getCldrType()180         CldrDataType getCldrType() {
181             return type;
182         }
183     }
184 
185     // Map to convert the rather arbitrarily defined "output types" to the directories into which
186     // the data is written. This is only for "LDML" types since other mappers don't need to split
187     // data into multiple directories.
188     private static final ImmutableListMultimap<OutputType, IcuLocaleDir> TYPE_TO_DIR =
189         ImmutableListMultimap.<OutputType, IcuLocaleDir>builder()
190             .putAll(OutputType.LOCALES, CURR, LANG, LOCALES, REGION, UNIT, ZONE)
191             .putAll(OutputType.BRKITR, BRKITR)
192             .putAll(OutputType.COLL, COLL)
193             .putAll(OutputType.RBNF, RBNF)
194             .build();
195 
196     /** Converts CLDR data according to the given configuration. */
convert( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)197     public static void convert(
198         CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) {
199         new LdmlConverter(src, supplementalData, config).convertAll();
200     }
201 
202     // The supplier for all data to be converted.
203     private final CldrDataSupplier src;
204     // Supplemental data available to mappers if needed.
205     private final SupplementalData supplementalData;
206     // The configuration controlling conversion behaviour.
207     private final LdmlConverterConfig config;
208     // The set of expanded target locale IDs.
209     // TODO: Make available IDs include specials files (or fail if specials are not available).
210     private final ImmutableSet<String> availableIds;
211     // Transformer for locale data.
212     private final PathValueTransformer localeTransformer;
213     // Transformer for supplemental data.
214     private final PathValueTransformer supplementalTransformer;
215     // Header string to go into every ICU data and transliteration rule file (comment prefixes
216     // are not present and must be added by the code writing the file).
217     private final ImmutableList<String> fileHeader;
218 
LdmlConverter( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)219     private LdmlConverter(
220         CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) {
221         this.src = checkNotNull(src);
222         this.supplementalData = checkNotNull(supplementalData);
223         this.config = checkNotNull(config);
224         this.availableIds = ImmutableSet.copyOf(
225             Sets.intersection(supplementalData.getAvailableLocaleIds(), config.getAllLocaleIds()));
226         // Load the remaining path value transformers.
227         this.supplementalTransformer =
228             RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_supplemental.txt"),
229                 IcuFunctions.ALGORITHM_FN,
230                 IcuFunctions.DATE_FN,
231                 IcuFunctions.DAY_NUMBER_FN,
232                 IcuFunctions.EXP_FN,
233                 IcuFunctions.YMD_FN);
234         this.localeTransformer =
235             RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_locale.txt"),
236                 IcuFunctions.CONTEXT_TRANSFORM_INDEX_FN);
237         this.fileHeader = readLinesFromResource("/ldml2icu_header.txt");
238     }
239 
convertAll()240     private void convertAll() {
241         processLdml();
242         processSupplemental();
243         if (config.emitReport()) {
244             System.out.println("Supplemental Data Transformer=" + supplementalTransformer);
245             System.out.println("Locale Data Transformer=" + localeTransformer);
246         }
247     }
248 
readLinesFromResource(String name)249     private static ImmutableList<String> readLinesFromResource(String name) {
250         try (InputStream in = LdmlConverter.class.getResourceAsStream(name)) {
251             return ImmutableList.copyOf(CharStreams.readLines(new InputStreamReader(in, UTF_8)));
252         } catch (IOException e) {
253             throw new RuntimeException("cannot read resource: " + name, e);
254         }
255     }
256 
loadSpecialsData(String localeId)257     private Optional<CldrData> loadSpecialsData(String localeId) {
258         String expected = localeId + ".xml";
259         try (Stream<Path> files = Files.walk(config.getSpecialsDir())) {
260             Set<Path> xmlFiles = files
261                 .filter(Files::isRegularFile)
262                 .filter(f -> f.getFileName().toString().equals(expected))
263                 .collect(Collectors.toSet());
264             return !xmlFiles.isEmpty()
265                 ? Optional.of(
266                 CldrDataSupplier.forCldrFiles(LDML, config.getMinimumDraftStatus(), xmlFiles))
267                 : Optional.empty();
268         } catch (IOException e) {
269             throw new RuntimeException(
270                 "error processing specials directory: " + config.getSpecialsDir(), e);
271         }
272     }
273 
processLdml()274     private void processLdml() {
275         ImmutableList<IcuLocaleDir> splitDirs =
276             config.getOutputTypes().stream()
277                 .filter(t -> t.getCldrType() == LDML)
278                 .flatMap(t -> TYPE_TO_DIR.get(t).stream())
279                 .collect(toImmutableList());
280         if (splitDirs.isEmpty()) {
281             return;
282         }
283 
284         String cldrVersion = config.getVersionInfo().getCldrVersion();
285 
286         Map<IcuLocaleDir, DependencyGraph> graphMetadata = new HashMap<>();
287         splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph(cldrVersion)));
288 
289         SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create();
290         Path baseDir = config.getOutputDir();
291 
292         System.out.println("processing standard ldml files");
293         for (String id : config.getAllLocaleIds()) {
294             // Skip "target" IDs that are aliases (they are handled later).
295             if (!availableIds.contains(id)) {
296                 continue;
297             }
298             // TODO: Remove the following skip when ICU-20997 is fixed
299             if (id.contains("VALENCIA") || id.contains("TARASK")) {
300                 System.out.println("(skipping " + id + " until ICU-20997 is fixed)");
301                 continue;
302             }
303 
304             IcuData icuData = new IcuData(id, true);
305 
306             Optional<CldrData> specials = loadSpecialsData(id);
307             CldrData unresolved = src.getDataForLocale(id, UNRESOLVED);
308 
309             BreakIteratorMapper.process(icuData, unresolved, specials);
310             CollationMapper.process(icuData, unresolved, specials, cldrVersion);
311             RbnfMapper.process(icuData, unresolved, specials);
312 
313             CldrData resolved = src.getDataForLocale(id, RESOLVED);
314             Optional<String> defaultCalendar = supplementalData.getDefaultCalendar(id);
315             LocaleMapper.process(
316                 icuData, unresolved, resolved, specials, localeTransformer, defaultCalendar);
317 
318             ListMultimap<IcuLocaleDir, RbPath> splitPaths = LinkedListMultimap.create();
319             for (RbPath p : icuData.getPaths()) {
320                 String rootName = getBaseSegmentName(p.getSegment(0));
321                 splitPaths.put(LOCALE_SPLIT_INFO.getOrDefault(rootName, LOCALES), p);
322             }
323 
324             Optional<String> parent = supplementalData.getExplicitParentLocaleOf(id);
325             // We always write base languages (even if empty).
326             boolean isBaseLanguage = !id.contains("_");
327             // Run through all directories (not just the keySet() of the split path map) since we
328             // sometimes write empty files.
329             for (IcuLocaleDir dir : splitDirs) {
330                 Set<String> targetIds = config.getTargetLocaleIds(dir);
331                 if (!targetIds.contains(id)) {
332                     if (!splitPaths.get(dir).isEmpty()) {
333                         System.out.format(
334                             "target IDs for %s does not contain %s, but it has data: %s\n",
335                             dir, id, splitPaths.get(dir));
336                     }
337                     continue;
338                 }
339 
340                 Path outDir = baseDir.resolve(dir.getOutputDir());
341                 IcuData splitData = new IcuData(icuData.getName(), icuData.hasFallback());
342 
343                 // The split data can still be empty for this directory, but that's expected (it
344                 // might only be written because it has an explicit parent added below).
345                 splitPaths.get(dir).forEach(p -> splitData.add(p, icuData.get(p)));
346 
347                 // If we add an explicit parent locale, it forces the data to be written. This is
348                 // where we check for forced overrides of the parent relationship (which is a per
349                 // directory thing).
350                 getIcuParent(id, parent, dir).ifPresent(p -> {
351                     splitData.add(RB_PARENT, p);
352                     graphMetadata.get(dir).addParent(id, p);
353                 });
354 
355                 if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) {
356                     if (id.equals("root")) {
357                         splitData.setVersion(cldrVersion);
358                     }
359                     write(splitData, outDir, false);
360                     writtenLocaleIds.put(dir, id);
361                 }
362             }
363         }
364 
365         System.out.println("processing alias ldml files");
366         for (IcuLocaleDir dir : splitDirs) {
367             Path outDir = baseDir.resolve(dir.getOutputDir());
368             Set<String> targetIds = config.getTargetLocaleIds(dir);
369             DependencyGraph depGraph = graphMetadata.get(dir);
370 
371             // TODO: Maybe calculate alias map directly into the dependency graph?
372             Map<String, String> aliasMap = getAliasMap(targetIds, dir);
373             aliasMap.forEach((s, t) -> {
374                 depGraph.addAlias(s, t);
375                 writeAliasFile(s, t, outDir);
376                 // It's only important to record which alias files are written because of forced
377                 // aliases, but since it's harmless otherwise, we just do it unconditionally.
378                 // Normal alias files don't affect the empty file calculation, but forced ones can.
379                 writtenLocaleIds.put(dir, s);
380             });
381 
382             calculateEmptyFiles(writtenLocaleIds.get(dir), aliasMap.values())
383                 .forEach(id -> writeEmptyFile(id, outDir, aliasMap.values()));
384 
385             writeDependencyGraph(outDir, depGraph);
386         }
387     }
388 
389 
390     private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%");
391 
392     // Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type
393     // annotations (e.g. "languages:intvector"). We strip these when considering the element name.
getBaseSegmentName(String segment)394     private static String getBaseSegmentName(String segment) {
395         int idx = PATH_MODIFIER.indexIn(segment);
396         return idx == -1 ? segment : segment.substring(0, idx);
397     }
398 
399     /*
400      * There are four reasons for treating a locale ID as an alias.
401      * 1: It contains deprecated subtags (e.g. "sr_YU", which should be "sr_Cyrl_RS").
402      * 2: It has no CLDR data but is missing a script subtag.
403      * 3: It is one of the special "phantom" alias which cannot be represented normally
404      *    and must be manually mapped (e.g. legacy locale IDs which don't even parse).
405      * 4: It is a "super special" forced alias, which might replace existing aliases in
406      *    some output directories.
407      */
getAliasMap(Set<String> localeIds, IcuLocaleDir dir)408     private Map<String, String> getAliasMap(Set<String> localeIds, IcuLocaleDir dir) {
409         // Even forced aliases only apply if they are in the set of locale IDs for the directory.
410         Map<String, String> forcedAliases =
411             Maps.filterKeys(config.getForcedAliases(dir), localeIds::contains);
412 
413         Map<String, String> aliasMap = new LinkedHashMap<>();
414         for (String id : localeIds) {
415             if (forcedAliases.containsKey(id)) {
416                 // Forced aliases will be added later and don't need to be processed here. This
417                 // is especially necessary if the ID is not structurally valid (e.g. "no_NO_NY")
418                 // since that cannot be processed by the code below.
419                 continue;
420             }
421             String canonicalId = supplementalData.replaceDeprecatedTags(id);
422             if (!canonicalId.equals(id)) {
423                 // If the canonical form of an ID differs from the requested ID, the this is an
424                 // alias, and just needs to point to the canonical ID.
425                 aliasMap.put(id, canonicalId);
426                 continue;
427             }
428             if (availableIds.contains(id)) {
429                 // If it's canonical and supported, it's not an alias.
430                 continue;
431             }
432             // If the requested locale is not supported, maximize it and alias to that.
433             String maximizedId = supplementalData.maximize(id)
434                 .orElseThrow(() -> new IllegalArgumentException("unsupported locale ID: " + id));
435             // We can't alias to ourselves and we shouldn't be here is the ID was already maximal.
436             checkArgument(!maximizedId.equals(id), "unsupported maximized locale ID: %s", id);
437             aliasMap.put(id, maximizedId);
438         }
439         // Important that we overwrite entries which might already exist here, since we might have
440         // already calculated a "natural" alias for something that we want to force (and we should
441         // replace the existing target, since that affects how we determine empty files later).
442         aliasMap.putAll(forcedAliases);
443         return aliasMap;
444     }
445 
446     /*
447      * Helper to determine the correct parent ID to be written into the ICU data file. The rules
448      * are:
449      * 1: If no forced parent exists (common) write the explicit parent (if that exists)
450      * 2: If a forced parent exists, but the forced value is what you would get by just truncating
451      *    the current locale ID, write nothing (ICU libraries truncate when no parent is set).
452      * 3: Write the forced parent (this is an exceptional case, and may not even occur in data).
453      */
getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir)454     private Optional<String> getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir) {
455         String forcedParentId = config.getForcedParents(dir).get(id);
456         if (forcedParentId == null) {
457             return parent;
458         }
459         return id.contains("_") && forcedParentId.regionMatches(0, id, 0, id.lastIndexOf('_'))
460             ? Optional.empty() : Optional.of(forcedParentId);
461     }
462 
processSupplemental()463     private void processSupplemental() {
464         for (OutputType type : config.getOutputTypes()) {
465             if (type.getCldrType() == LDML) {
466                 continue;
467             }
468             System.out.println("processing supplemental type " + type);
469             switch (type) {
470             case DAY_PERIODS:
471                 write(DayPeriodsMapper.process(src), "misc");
472                 break;
473 
474             case GENDER_LIST:
475                 processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false);
476                 break;
477 
478             case LIKELY_SUBTAGS:
479                 processSupplemental("likelySubtags", LIKELY_SUBTAGS_PATHS, "misc", false);
480                 break;
481 
482             case SUPPLEMENTAL_DATA:
483                 processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true);
484                 break;
485 
486             case UNITS:
487                 processSupplemental("units", UNITS_DATA_PATHS, "misc", true);
488                 break;
489 
490             case CURRENCY_DATA:
491                 processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", false);
492                 break;
493 
494             case GRAMMATICAL_FEATURES:
495                 processSupplemental("grammaticalFeatures", GRAMMATICAL_FEATURES_PATHS, "misc", false);
496                 break;
497 
498             case METADATA:
499                 processSupplemental("metadata", METADATA_PATHS, "misc", false);
500                 break;
501 
502             case META_ZONES:
503                 processSupplemental("metaZones", METAZONE_PATHS, "misc", false);
504                 break;
505 
506             case NUMBERING_SYSTEMS:
507                 processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false);
508                 break;
509 
510             case PLURALS:
511                 write(PluralsMapper.process(src), "misc");
512                 break;
513 
514             case PLURAL_RANGES:
515                 write(PluralRangesMapper.process(src), "misc");
516                 break;
517 
518             case LOCALE_DISTANCE:
519                 write(LocaleDistanceMapper.process(src), "misc");
520                 break;
521 
522             case WINDOWS_ZONES:
523                 processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false);
524                 break;
525 
526             case TRANSFORMS:
527                 Path transformDir = createDirectory(config.getOutputDir().resolve("translit"));
528                 write(TransformsMapper.process(src, transformDir, fileHeader), transformDir, false);
529                 break;
530 
531             case VERSION:
532                 writeIcuVersionInfo();
533                 break;
534 
535             case KEY_TYPE_DATA:
536                 Bcp47Mapper.process(src).forEach(d -> write(d, "misc"));
537                 break;
538 
539             default:
540                 throw new AssertionError("Unsupported supplemental type: " + type);
541             }
542         }
543     }
544 
545     private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion");
546 
processSupplemental( String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion)547     private void processSupplemental(
548         String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion) {
549         IcuData icuData =
550             SupplementalMapper.process(src, supplementalTransformer, label, paths);
551         // A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the
552         // supplemental data XML files.
553         if (addCldrVersion) {
554             // Not the same path as used by "setVersion()"
555             icuData.add(RB_CLDR_VERSION, config.getVersionInfo().getCldrVersion());
556         }
557         write(icuData, dir);
558     }
559 
writeAliasFile(String srcId, String destId, Path dir)560     private void writeAliasFile(String srcId, String destId, Path dir) {
561         IcuData icuData = new IcuData(srcId, true);
562         icuData.add(RB_ALIAS, destId);
563         // Allow overwrite for aliases since some are "forced" and overwrite existing targets.
564         // TODO: Maybe tighten this up so only forced aliases for existing targets are overwritten.
565         write(icuData, dir, true);
566     }
567 
writeEmptyFile(String id, Path dir, Collection<String> aliasTargets)568     private void writeEmptyFile(String id, Path dir, Collection<String> aliasTargets) {
569         IcuData icuData = new IcuData(id, true);
570         // TODO: Document the reason for this (i.e. why does it matter what goes into empty files?)
571         if (aliasTargets.contains(id)) {
572             icuData.setFileComment("generated alias target");
573             icuData.add(RB_EMPTY_ALIAS, "");
574         } else {
575             // These empty files only exist because the target of an alias has a parent locale
576             // which is itself not in the set of written ICU files. An "indirect alias target".
577             // No need to add data: Just write a resource bundle with an empty top-level table.
578         }
579         write(icuData, dir, false);
580     }
581 
writeIcuVersionInfo()582     private void writeIcuVersionInfo() {
583         IcuVersionInfo versionInfo = config.getVersionInfo();
584         IcuData versionData = new IcuData("icuver", false);
585         versionData.add(RbPath.of("ICUVersion"), versionInfo.getIcuVersion());
586         versionData.add(RbPath.of("DataVersion"), versionInfo.getIcuDataVersion());
587         versionData.add(RbPath.of("CLDRVersion"), versionInfo.getCldrVersion());
588         // Write file via non-helper methods since we need to include a legacy copyright.
589         Path miscDir = config.getOutputDir().resolve("misc");
590         createDirectory(miscDir);
591         ImmutableList<String> versionHeader = ImmutableList.<String>builder()
592             .addAll(fileHeader)
593             .add(
594                 "***************************************************************************",
595                 "*",
596                 "* Copyright (C) 2010-2016 International Business Machines",
597                 "* Corporation and others.  All Rights Reserved.",
598                 "*",
599                 "***************************************************************************")
600             .build();
601         IcuTextWriter.writeToFile(versionData, miscDir, versionHeader, false);
602     }
603 
604     // Commonest case for writing data files in "normal" directories.
write(IcuData icuData, String dir)605     private void write(IcuData icuData, String dir) {
606         write(icuData, config.getOutputDir().resolve(dir), false);
607     }
608 
write(IcuData icuData, Path dir, boolean allowOverwrite)609     private void write(IcuData icuData, Path dir, boolean allowOverwrite) {
610         createDirectory(dir);
611         IcuTextWriter.writeToFile(icuData, dir, fileHeader, allowOverwrite);
612     }
613 
createDirectory(Path dir)614     private Path createDirectory(Path dir) {
615         try {
616             Files.createDirectories(dir);
617         } catch (IOException e) {
618             throw new RuntimeException("cannot create directory: " + dir, e);
619         }
620         return dir;
621     }
622 
writeDependencyGraph(Path dir, DependencyGraph depGraph)623     private void writeDependencyGraph(Path dir, DependencyGraph depGraph) {
624         createDirectory(dir);
625         try (BufferedWriter w = Files.newBufferedWriter(dir.resolve("LOCALE_DEPS.json"), UTF_8);
626             PrintWriter out = new PrintWriter(w)) {
627             depGraph.writeJsonTo(out, fileHeader);
628             out.flush();
629         } catch (IOException e) {
630             throw new RuntimeException("cannot write dependency graph file: " + dir, e);
631         }
632     }
633 
634     // The set of IDs to process is:
635     // * any file that was written
636     // * any alias target (not written)
637     //
638     // From which we generate the complete "closure" under the "getParent()" function. This set
639     // contains all file (written or not) which need to exist to complete the locale hierarchy.
640     //
641     // Then we remove all the written files to just leave the ones that need to be generated.
642     // This is a simple and robust approach that handles things like "gaps" in non-aliased
643     // locale IDs, where an intermediate parent is not present.
calculateEmptyFiles( Set<String> writtenIds, Collection<String> aliasTargetIds)644     private ImmutableSet<String> calculateEmptyFiles(
645         Set<String> writtenIds, Collection<String> aliasTargetIds) {
646 
647         Set<String> seedIds = new HashSet<>(writtenIds);
648         seedIds.addAll(aliasTargetIds);
649         // Be nice and sort the output (makes easier debugging).
650         Set<String> allIds = new TreeSet<>();
651         for (String id : seedIds) {
652             while (!id.equals("root") && !allIds.contains(id)) {
653                 allIds.add(id);
654                 id = supplementalData.getParent(id);
655             }
656         }
657         return ImmutableSet.copyOf(Sets.difference(allIds, writtenIds));
658     }
659 
660     private static final ImmutableMap<String, IcuLocaleDir> LOCALE_SPLIT_INFO =
661         ImmutableMap.<String, IcuLocaleDir>builder()
662             // BRKITR
663             .put("boundaries", BRKITR)
664             .put("dictionaries", BRKITR)
665             .put("exceptions", BRKITR)
666             .put("extensions", BRKITR)
667             .put("lstm", BRKITR)
668             // COLL
669             .put("collations", COLL)
670             .put("depends", COLL)
671             .put("UCARules", COLL)
672             // CURR
673             .put("Currencies", CURR)
674             .put("CurrencyPlurals", CURR)
675             .put("CurrencyUnitPatterns", CURR)
676             .put("currencySpacing", CURR)
677             // LANG
678             .put("Keys", LANG)
679             .put("Languages", LANG)
680             .put("Scripts", LANG)
681             .put("Types", LANG)
682             .put("Variants", LANG)
683             .put("characterLabelPattern", LANG)
684             .put("codePatterns", LANG)
685             .put("localeDisplayPattern", LANG)
686             // RBNF
687             .put("RBNFRules", RBNF)
688             // REGION
689             .put("Countries", REGION)
690             // UNIT
691             .put("durationUnits", UNIT)
692             .put("units", UNIT)
693             .put("unitsShort", UNIT)
694             .put("unitsNarrow", UNIT)
695             // ZONE
696             .put("zoneStrings", ZONE)
697             .build();
698 }
699