• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu;
4 
5 import static com.google.common.base.Preconditions.checkArgument;
6 import static com.google.common.base.Preconditions.checkNotNull;
7 import static com.google.common.collect.ImmutableList.toImmutableList;
8 import static java.nio.charset.StandardCharsets.UTF_8;
9 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
10 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
11 import static org.unicode.cldr.api.CldrDataType.BCP47;
12 import static org.unicode.cldr.api.CldrDataType.LDML;
13 import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
14 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.BRKITR;
15 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.COLL;
16 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.CURR;
17 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LANG;
18 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LOCALES;
19 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.RBNF;
20 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.REGION;
21 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.UNIT;
22 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.ZONE;
23 
24 import java.io.BufferedWriter;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.InputStreamReader;
28 import java.io.PrintWriter;
29 import java.nio.file.Files;
30 import java.nio.file.Path;
31 import java.util.*;
32 import java.util.function.Predicate;
33 import java.util.stream.Collectors;
34 import java.util.stream.Stream;
35 
36 import org.unicode.cldr.api.CldrData;
37 import org.unicode.cldr.api.CldrDataSupplier;
38 import org.unicode.cldr.api.CldrDataType;
39 import org.unicode.cldr.api.CldrPath;
40 import org.unicode.cldr.api.PathMatcher;
41 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
42 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuVersionInfo;
43 import org.unicode.icu.tool.cldrtoicu.localedistance.LocaleDistanceMapper;
44 import org.unicode.icu.tool.cldrtoicu.mapper.Bcp47Mapper;
45 import org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapper;
46 import org.unicode.icu.tool.cldrtoicu.mapper.CollationMapper;
47 import org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapper;
48 import org.unicode.icu.tool.cldrtoicu.mapper.LocaleMapper;
49 import org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapper;
50 import org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapper;
51 import org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapper;
52 import org.unicode.icu.tool.cldrtoicu.mapper.SupplementalMapper;
53 import org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapper;
54 import org.unicode.icu.tool.cldrtoicu.regex.RegexTransformer;
55 
56 import com.google.common.base.CharMatcher;
57 import com.google.common.collect.HashMultimap;
58 import com.google.common.collect.ImmutableList;
59 import com.google.common.collect.ImmutableListMultimap;
60 import com.google.common.collect.ImmutableMap;
61 import com.google.common.collect.ImmutableSet;
62 import com.google.common.collect.LinkedListMultimap;
63 import com.google.common.collect.ListMultimap;
64 import com.google.common.collect.Maps;
65 import com.google.common.collect.SetMultimap;
66 import com.google.common.collect.Sets;
67 import com.google.common.io.CharStreams;
68 
69 /**
70  * The main converter tool for CLDR to ICU data. To run this tool, you need to supply a suitable
71  * {@link LdmlConverterConfig} instance. There is a simple {@code main()} method available in this
72  * class which can be invoked passing just the desired output directory and which relies on the
73  * presence of several system properties for the remainder of its parameters:
74  * <ul>
75  *     <li>CLDR_DIR: The root of the CLDR release from which CLDR data is read.
76  *     <li>ICU_DIR: The root of the ICU release from which additional "specials" XML data is read.
77  *     <li>CLDR_DTD_CACHE: A temporary directory with the various DTDs cached (this is a legacy
78  *         requirement from the underlying CLDR libraries and might go away one day).
79  * </ul>
80  */
81 public final class LdmlConverter {
82     // TODO: Do all supplemental data in one go and split similarly to locale data (using RbPath).
83     private static final Predicate<CldrPath> GENDER_LIST_PATHS =
84         supplementalMatcher("gender");
85     private static final Predicate<CldrPath> LIKELY_SUBTAGS_PATHS =
86         supplementalMatcher("likelySubtags");
87     private static final Predicate<CldrPath> METAZONE_PATHS =
88         supplementalMatcher("metaZones", "primaryZones");
89     private static final Predicate<CldrPath> METADATA_PATHS =
90         supplementalMatcher("metadata");
91     private static final Predicate<CldrPath> SUPPLEMENTAL_DATA_PATHS =
92         supplementalMatcher(
93             "calendarData",
94             "calendarPreferenceData",
95             "codeMappings",
96             "codeMappingsCurrency",
97             "idValidity",
98             "languageData",
99             "languageMatching",
100             "measurementData",
101             "parentLocales",
102             "subdivisionContainment",
103             "territoryContainment",
104             "territoryInfo",
105             "timeData",
106             "weekData",
107             "weekOfPreference");
108     private static final Predicate<CldrPath> CURRENCY_DATA_PATHS =
109         supplementalMatcher("currencyData");
110     private static final Predicate<CldrPath> UNITS_DATA_PATHS =
111         supplementalMatcher(
112             "convertUnits",
113             "unitConstants",
114             "unitQuantities",
115             "unitPreferenceData");
116     private static final Predicate<CldrPath> NUMBERING_SYSTEMS_PATHS =
117         supplementalMatcher("numberingSystems");
118     private static final Predicate<CldrPath> WINDOWS_ZONES_PATHS =
119         supplementalMatcher("windowsZones");
120 
supplementalMatcher(String... spec)121     private static Predicate<CldrPath> supplementalMatcher(String... spec) {
122         checkArgument(spec.length > 0, "must supply at least one matcher spec");
123         if (spec.length == 1) {
124             return PathMatcher.of("//supplementalData/" + spec[0])::matchesPrefixOf;
125         }
126         return
127             Arrays.stream(spec)
128                 .map(s -> PathMatcher.of("//supplementalData/" + s))
129                 .map(m -> ((Predicate<CldrPath>) m::matchesPrefixOf))
130                 .reduce(p -> false, Predicate::or);
131     }
132 
133     private static RbPath RB_PARENT = RbPath.of("%%Parent");
134     // The quotes below are only so we achieve parity with the manually written alias files.
135     // TODO: Remove unnecessary quotes once the migration to this code is complete.
136     private static RbPath RB_ALIAS = RbPath.of("\"%%ALIAS\"");
137     // Special path for adding to empty files which only exist to complete the parent chain.
138     // TODO: Confirm that this has no meaningful effect and unify "empty" file contents.
139     private static RbPath RB_EMPTY_ALIAS = RbPath.of("___");
140 
141     /**
142      * Output types defining specific subsets of the ICU data which can be converted separately.
143      * This closely mimics the original "NewLdml2IcuConverter" behaviour but could be simplified to
144      * hide what are essentially implementation specific data splits.
145      */
146     public enum OutputType {
147         LOCALES(LDML),
148         BRKITR(LDML),
149         COLL(LDML),
150         RBNF(LDML),
151         DAY_PERIODS(SUPPLEMENTAL),
152         GENDER_LIST(SUPPLEMENTAL),
153         LIKELY_SUBTAGS(SUPPLEMENTAL),
154         SUPPLEMENTAL_DATA(SUPPLEMENTAL),
155         UNITS(SUPPLEMENTAL),
156         CURRENCY_DATA(SUPPLEMENTAL),
157         METADATA(SUPPLEMENTAL),
158         META_ZONES(SUPPLEMENTAL),
159         NUMBERING_SYSTEMS(SUPPLEMENTAL),
160         PLURALS(SUPPLEMENTAL),
161         PLURAL_RANGES(SUPPLEMENTAL),
162         WINDOWS_ZONES(SUPPLEMENTAL),
163         TRANSFORMS(SUPPLEMENTAL),
164         LOCALE_DISTANCE(SUPPLEMENTAL),
165         VERSION(SUPPLEMENTAL),
166         KEY_TYPE_DATA(BCP47);
167 
168         public static final ImmutableSet<OutputType> ALL = ImmutableSet.copyOf(OutputType.values());
169 
170         private final CldrDataType type;
171 
OutputType(CldrDataType type)172         OutputType(CldrDataType type) {
173             this.type = checkNotNull(type);
174         }
175 
getCldrType()176         CldrDataType getCldrType() {
177             return type;
178         }
179     }
180 
181     // Map to convert the rather arbitrarily defined "output types" to the directories into which
182     // the data is written. This is only for "LDML" types since other mappers don't need to split
183     // data into multiple directories.
184     private static final ImmutableListMultimap<OutputType, IcuLocaleDir> TYPE_TO_DIR =
185         ImmutableListMultimap.<OutputType, IcuLocaleDir>builder()
186             .putAll(OutputType.LOCALES, CURR, LANG, LOCALES, REGION, UNIT, ZONE)
187             .putAll(OutputType.BRKITR, BRKITR)
188             .putAll(OutputType.COLL, COLL)
189             .putAll(OutputType.RBNF, RBNF)
190             .build();
191 
192     /** Converts CLDR data according to the given configuration. */
convert( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)193     public static void convert(
194         CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) {
195         new LdmlConverter(src, supplementalData, config).convertAll();
196     }
197 
198     // The supplier for all data to be converted.
199     private final CldrDataSupplier src;
200     // Supplemental data available to mappers if needed.
201     private final SupplementalData supplementalData;
202     // The configuration controlling conversion behaviour.
203     private final LdmlConverterConfig config;
204     // The set of expanded target locale IDs.
205     // TODO: Make available IDs include specials files (or fail if specials are not available).
206     private final ImmutableSet<String> availableIds;
207     // Transformer for locale data.
208     private final PathValueTransformer localeTransformer;
209     // Transformer for supplemental data.
210     private final PathValueTransformer supplementalTransformer;
211     // Header string to go into every ICU data and transliteration rule file (comment prefixes
212     // are not present and must be added by the code writing the file).
213     private final ImmutableList<String> fileHeader;
214 
LdmlConverter( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)215     private LdmlConverter(
216         CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) {
217         this.src = checkNotNull(src);
218         this.supplementalData = checkNotNull(supplementalData);
219         this.config = checkNotNull(config);
220         this.availableIds = ImmutableSet.copyOf(
221             Sets.intersection(supplementalData.getAvailableLocaleIds(), config.getAllLocaleIds()));
222         // Load the remaining path value transformers.
223         this.supplementalTransformer =
224             RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_supplemental.txt"),
225                 IcuFunctions.ALGORITHM_FN,
226                 IcuFunctions.DATE_FN,
227                 IcuFunctions.DAY_NUMBER_FN,
228                 IcuFunctions.EXP_FN,
229                 IcuFunctions.YMD_FN);
230         this.localeTransformer =
231             RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_locale.txt"),
232                 IcuFunctions.CONTEXT_TRANSFORM_INDEX_FN);
233         this.fileHeader = readLinesFromResource("/ldml2icu_header.txt");
234     }
235 
convertAll()236     private void convertAll() {
237         processLdml();
238         processSupplemental();
239         if (config.emitReport()) {
240             System.out.println("Supplemental Data Transformer=" + supplementalTransformer);
241             System.out.println("Locale Data Transformer=" + localeTransformer);
242         }
243     }
244 
readLinesFromResource(String name)245     private static ImmutableList<String> readLinesFromResource(String name) {
246         try (InputStream in = LdmlConverter.class.getResourceAsStream(name)) {
247             return ImmutableList.copyOf(CharStreams.readLines(new InputStreamReader(in, UTF_8)));
248         } catch (IOException e) {
249             throw new RuntimeException("cannot read resource: " + name, e);
250         }
251     }
252 
loadSpecialsData(String localeId)253     private Optional<CldrData> loadSpecialsData(String localeId) {
254         String expected = localeId + ".xml";
255         try (Stream<Path> files = Files.walk(config.getSpecialsDir())) {
256             Set<Path> xmlFiles = files
257                 .filter(Files::isRegularFile)
258                 .filter(f -> f.getFileName().toString().equals(expected))
259                 .collect(Collectors.toSet());
260             return !xmlFiles.isEmpty()
261                 ? Optional.of(
262                 CldrDataSupplier.forCldrFiles(LDML, config.getMinimumDraftStatus(), xmlFiles))
263                 : Optional.empty();
264         } catch (IOException e) {
265             throw new RuntimeException(
266                 "error processing specials directory: " + config.getSpecialsDir(), e);
267         }
268     }
269 
processLdml()270     private void processLdml() {
271         ImmutableList<IcuLocaleDir> splitDirs =
272             config.getOutputTypes().stream()
273                 .filter(t -> t.getCldrType() == LDML)
274                 .flatMap(t -> TYPE_TO_DIR.get(t).stream())
275                 .collect(toImmutableList());
276         if (splitDirs.isEmpty()) {
277             return;
278         }
279 
280         String cldrVersion = config.getVersionInfo().getCldrVersion();
281 
282         Map<IcuLocaleDir, DependencyGraph> graphMetadata = new HashMap<>();
283         splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph(cldrVersion)));
284 
285         SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create();
286         Path baseDir = config.getOutputDir();
287 
288         for (String id : config.getAllLocaleIds()) {
289             // Skip "target" IDs that are aliases (they are handled later).
290             if (!availableIds.contains(id)) {
291                 continue;
292             }
293             // TODO: Remove the following skip when ICU-20997 is fixed
294             if (id.contains("VALENCIA")) {
295                 System.out.println("(skipping " + id + " until ICU-20997 is fixed)");
296                 continue;
297             }
298 
299             IcuData icuData = new IcuData(id, true);
300 
301             Optional<CldrData> specials = loadSpecialsData(id);
302             CldrData unresolved = src.getDataForLocale(id, UNRESOLVED);
303 
304             BreakIteratorMapper.process(icuData, unresolved, specials);
305             CollationMapper.process(icuData, unresolved, specials, cldrVersion);
306             RbnfMapper.process(icuData, unresolved, specials);
307 
308             CldrData resolved = src.getDataForLocale(id, RESOLVED);
309             Optional<String> defaultCalendar = supplementalData.getDefaultCalendar(id);
310             LocaleMapper.process(
311                 icuData, unresolved, resolved, specials, localeTransformer, defaultCalendar);
312 
313             ListMultimap<IcuLocaleDir, RbPath> splitPaths = LinkedListMultimap.create();
314             for (RbPath p : icuData.getPaths()) {
315                 String rootName = getBaseSegmentName(p.getSegment(0));
316                 splitPaths.put(LOCALE_SPLIT_INFO.getOrDefault(rootName, LOCALES), p);
317             }
318 
319             Optional<String> parent = supplementalData.getExplicitParentLocaleOf(id);
320             // We always write base languages (even if empty).
321             boolean isBaseLanguage = !id.contains("_");
322             // Run through all directories (not just the keySet() of the split path map) since we
323             // sometimes write empty files.
324             for (IcuLocaleDir dir : splitDirs) {
325                 Set<String> targetIds = config.getTargetLocaleIds(dir);
326                 if (!targetIds.contains(id)) {
327                     if (!splitPaths.get(dir).isEmpty()) {
328                         System.out.format(
329                             "target IDs for %s does not contain %s, but it has data: %s\n",
330                             dir, id, splitPaths.get(dir));
331                     }
332                     continue;
333                 }
334 
335                 Path outDir = baseDir.resolve(dir.getOutputDir());
336                 IcuData splitData = new IcuData(icuData.getName(), icuData.hasFallback());
337 
338                 // The split data can still be empty for this directory, but that's expected (it
339                 // might only be written because it has an explicit parent added below).
340                 splitPaths.get(dir).forEach(p -> splitData.add(p, icuData.get(p)));
341 
342                 // If we add an explicit parent locale, it forces the data to be written. This is
343                 // where we check for forced overrides of the parent relationship (which is a per
344                 // directory thing).
345                 getIcuParent(id, parent, dir).ifPresent(p -> {
346                     splitData.add(RB_PARENT, p);
347                     graphMetadata.get(dir).addParent(id, p);
348                 });
349 
350                 if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) {
351                     if (id.equals("root")) {
352                         splitData.setVersion(cldrVersion);
353                     }
354                     write(splitData, outDir, false);
355                     writtenLocaleIds.put(dir, id);
356                 }
357             }
358         }
359 
360         for (IcuLocaleDir dir : splitDirs) {
361             Path outDir = baseDir.resolve(dir.getOutputDir());
362             Set<String> targetIds = config.getTargetLocaleIds(dir);
363             DependencyGraph depGraph = graphMetadata.get(dir);
364 
365             // TODO: Maybe calculate alias map directly into the dependency graph?
366             Map<String, String> aliasMap = getAliasMap(targetIds, dir);
367             aliasMap.forEach((s, t) -> {
368                 depGraph.addAlias(s, t);
369                 writeAliasFile(s, t, outDir);
370                 // It's only important to record which alias files are written because of forced
371                 // aliases, but since it's harmless otherwise, we just do it unconditionally.
372                 // Normal alias files don't affect the empty file calculation, but forced ones can.
373                 writtenLocaleIds.put(dir, s);
374             });
375 
376             calculateEmptyFiles(writtenLocaleIds.get(dir), aliasMap.values())
377                 .forEach(id -> writeEmptyFile(id, outDir, aliasMap.values()));
378 
379             writeDependencyGraph(outDir, depGraph);
380         }
381     }
382 
383 
384     private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%");
385 
386     // Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type
387     // annotations (e.g. "languages:intvector"). We strip these when considering the element name.
getBaseSegmentName(String segment)388     private static String getBaseSegmentName(String segment) {
389         int idx = PATH_MODIFIER.indexIn(segment);
390         return idx == -1 ? segment : segment.substring(0, idx);
391     }
392 
393     /*
394      * There are four reasons for treating a locale ID as an alias.
395      * 1: It contains deprecated subtags (e.g. "sr_YU", which should be "sr_Cyrl_RS").
396      * 2: It has no CLDR data but is missing a script subtag.
397      * 3: It is one of the special "phantom" alias which cannot be represented normally
398      *    and must be manually mapped (e.g. legacy locale IDs which don't even parse).
399      * 4: It is a "super special" forced alias, which might replace existing aliases in
400      *    some output directories.
401      */
getAliasMap(Set<String> localeIds, IcuLocaleDir dir)402     private Map<String, String> getAliasMap(Set<String> localeIds, IcuLocaleDir dir) {
403         // Even forced aliases only apply if they are in the set of locale IDs for the directory.
404         Map<String, String> forcedAliases =
405             Maps.filterKeys(config.getForcedAliases(dir), localeIds::contains);
406 
407         Map<String, String> aliasMap = new LinkedHashMap<>();
408         for (String id : localeIds) {
409             if (forcedAliases.containsKey(id)) {
410                 // Forced aliases will be added later and don't need to be processed here. This
411                 // is especially necessary if the ID is not structurally valid (e.g. "no_NO_NY")
412                 // since that cannot be processed by the code below.
413                 continue;
414             }
415             String canonicalId = supplementalData.replaceDeprecatedTags(id);
416             if (!canonicalId.equals(id)) {
417                 // If the canonical form of an ID differs from the requested ID, the this is an
418                 // alias, and just needs to point to the canonical ID.
419                 aliasMap.put(id, canonicalId);
420                 continue;
421             }
422             if (availableIds.contains(id)) {
423                 // If it's canonical and supported, it's not an alias.
424                 continue;
425             }
426             // If the requested locale is not supported, maximize it and alias to that.
427             String maximizedId = supplementalData.maximize(id)
428                 .orElseThrow(() -> new IllegalArgumentException("unsupported locale ID: " + id));
429             // We can't alias to ourselves and we shouldn't be here is the ID was already maximal.
430             checkArgument(!maximizedId.equals(id), "unsupported maximized locale ID: %s", id);
431             aliasMap.put(id, maximizedId);
432         }
433         // Important that we overwrite entries which might already exist here, since we might have
434         // already calculated a "natural" alias for something that we want to force (and we should
435         // replace the existing target, since that affects how we determine empty files later).
436         aliasMap.putAll(forcedAliases);
437         return aliasMap;
438     }
439 
440     /*
441      * Helper to determine the correct parent ID to be written into the ICU data file. The rules
442      * are:
443      * 1: If no forced parent exists (common) write the explicit parent (if that exists)
444      * 2: If a forced parent exists, but the forced value is what you would get by just truncating
445      *    the current locale ID, write nothing (ICU libraries truncate when no parent is set).
446      * 3: Write the forced parent (this is an exceptional case, and may not even occur in data).
447      */
getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir)448     private Optional<String> getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir) {
449         String forcedParentId = config.getForcedParents(dir).get(id);
450         if (forcedParentId == null) {
451             return parent;
452         }
453         return id.contains("_") && forcedParentId.regionMatches(0, id, 0, id.lastIndexOf('_'))
454             ? Optional.empty() : Optional.of(forcedParentId);
455     }
456 
processSupplemental()457     private void processSupplemental() {
458         for (OutputType type : config.getOutputTypes()) {
459             if (type.getCldrType() == LDML) {
460                 continue;
461             }
462             switch (type) {
463             case DAY_PERIODS:
464                 write(DayPeriodsMapper.process(src), "misc");
465                 break;
466 
467             case GENDER_LIST:
468                 processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false);
469                 break;
470 
471             case LIKELY_SUBTAGS:
472                 processSupplemental("likelySubtags", LIKELY_SUBTAGS_PATHS, "misc", false);
473                 break;
474 
475             case SUPPLEMENTAL_DATA:
476                 processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true);
477                 break;
478 
479             case UNITS:
480                 processSupplemental("units", UNITS_DATA_PATHS, "misc", true);
481                 break;
482 
483             case CURRENCY_DATA:
484                 processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", false);
485                 break;
486 
487             case METADATA:
488                 processSupplemental("metadata", METADATA_PATHS, "misc", false);
489                 break;
490 
491             case META_ZONES:
492                 processSupplemental("metaZones", METAZONE_PATHS, "misc", false);
493                 break;
494 
495             case NUMBERING_SYSTEMS:
496                 processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false);
497                 break;
498 
499             case PLURALS:
500                 write(PluralsMapper.process(src), "misc");
501                 break;
502 
503             case PLURAL_RANGES:
504                 write(PluralRangesMapper.process(src), "misc");
505                 break;
506 
507             case LOCALE_DISTANCE:
508                 write(LocaleDistanceMapper.process(src), "misc");
509                 break;
510 
511             case WINDOWS_ZONES:
512                 processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false);
513                 break;
514 
515             case TRANSFORMS:
516                 Path transformDir = createDirectory(config.getOutputDir().resolve("translit"));
517                 write(TransformsMapper.process(src, transformDir, fileHeader), transformDir, false);
518                 break;
519 
520             case VERSION:
521                 writeIcuVersionInfo();
522                 break;
523 
524             case KEY_TYPE_DATA:
525                 Bcp47Mapper.process(src).forEach(d -> write(d, "misc"));
526                 break;
527 
528             default:
529                 throw new AssertionError("Unsupported supplemental type: " + type);
530             }
531         }
532     }
533 
534     private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion");
535 
processSupplemental( String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion)536     private void processSupplemental(
537         String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion) {
538         IcuData icuData =
539             SupplementalMapper.process(src, supplementalTransformer, label, paths);
540         // A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the
541         // supplemental data XML files.
542         if (addCldrVersion) {
543             // Not the same path as used by "setVersion()"
544             icuData.add(RB_CLDR_VERSION, config.getVersionInfo().getCldrVersion());
545         }
546         write(icuData, dir);
547     }
548 
writeAliasFile(String srcId, String destId, Path dir)549     private void writeAliasFile(String srcId, String destId, Path dir) {
550         IcuData icuData = new IcuData(srcId, true);
551         icuData.add(RB_ALIAS, destId);
552         // Allow overwrite for aliases since some are "forced" and overwrite existing targets.
553         // TODO: Maybe tighten this up so only forced aliases for existing targets are overwritten.
554         write(icuData, dir, true);
555     }
556 
writeEmptyFile(String id, Path dir, Collection<String> aliasTargets)557     private void writeEmptyFile(String id, Path dir, Collection<String> aliasTargets) {
558         IcuData icuData = new IcuData(id, true);
559         // TODO: Document the reason for this (i.e. why does it matter what goes into empty files?)
560         if (aliasTargets.contains(id)) {
561             icuData.setFileComment("generated alias target");
562             icuData.add(RB_EMPTY_ALIAS, "");
563         } else {
564             // These empty files only exist because the target of an alias has a parent locale
565             // which is itself not in the set of written ICU files. An "indirect alias target".
566             // No need to add data: Just write a resource bundle with an empty top-level table.
567         }
568         write(icuData, dir, false);
569     }
570 
writeIcuVersionInfo()571     private void writeIcuVersionInfo() {
572         IcuVersionInfo versionInfo = config.getVersionInfo();
573         IcuData versionData = new IcuData("icuver", false);
574         versionData.add(RbPath.of("ICUVersion"), versionInfo.getIcuVersion());
575         versionData.add(RbPath.of("DataVersion"), versionInfo.getIcuDataVersion());
576         versionData.add(RbPath.of("CLDRVersion"), versionInfo.getCldrVersion());
577         // Write file via non-helper methods since we need to include a legacy copyright.
578         Path miscDir = config.getOutputDir().resolve("misc");
579         createDirectory(miscDir);
580         ImmutableList<String> versionHeader = ImmutableList.<String>builder()
581             .addAll(fileHeader)
582             .add(
583                 "***************************************************************************",
584                 "*",
585                 "* Copyright (C) 2010-2016 International Business Machines",
586                 "* Corporation and others.  All Rights Reserved.",
587                 "*",
588                 "***************************************************************************")
589             .build();
590         IcuTextWriter.writeToFile(versionData, miscDir, versionHeader, false);
591     }
592 
593     // Commonest case for writing data files in "normal" directories.
write(IcuData icuData, String dir)594     private void write(IcuData icuData, String dir) {
595         write(icuData, config.getOutputDir().resolve(dir), false);
596     }
597 
write(IcuData icuData, Path dir, boolean allowOverwrite)598     private void write(IcuData icuData, Path dir, boolean allowOverwrite) {
599         createDirectory(dir);
600         IcuTextWriter.writeToFile(icuData, dir, fileHeader, allowOverwrite);
601     }
602 
createDirectory(Path dir)603     private Path createDirectory(Path dir) {
604         try {
605             Files.createDirectories(dir);
606         } catch (IOException e) {
607             throw new RuntimeException("cannot create directory: " + dir, e);
608         }
609         return dir;
610     }
611 
writeDependencyGraph(Path dir, DependencyGraph depGraph)612     private void writeDependencyGraph(Path dir, DependencyGraph depGraph) {
613         createDirectory(dir);
614         try (BufferedWriter w = Files.newBufferedWriter(dir.resolve("LOCALE_DEPS.json"), UTF_8);
615             PrintWriter out = new PrintWriter(w)) {
616             depGraph.writeJsonTo(out, fileHeader);
617             out.flush();
618         } catch (IOException e) {
619             throw new RuntimeException("cannot write dependency graph file: " + dir, e);
620         }
621     }
622 
623     // The set of IDs to process is:
624     // * any file that was written
625     // * any alias target (not written)
626     //
627     // From which we generate the complete "closure" under the "getParent()" function. This set
628     // contains all file (written or not) which need to exist to complete the locale hierarchy.
629     //
630     // Then we remove all the written files to just leave the ones that need to be generated.
631     // This is a simple and robust approach that handles things like "gaps" in non-aliased
632     // locale IDs, where an intermediate parent is not present.
calculateEmptyFiles( Set<String> writtenIds, Collection<String> aliasTargetIds)633     private ImmutableSet<String> calculateEmptyFiles(
634         Set<String> writtenIds, Collection<String> aliasTargetIds) {
635 
636         Set<String> seedIds = new HashSet<>(writtenIds);
637         seedIds.addAll(aliasTargetIds);
638         // Be nice and sort the output (makes easier debugging).
639         Set<String> allIds = new TreeSet<>();
640         for (String id : seedIds) {
641             while (!id.equals("root") && !allIds.contains(id)) {
642                 allIds.add(id);
643                 id = supplementalData.getParent(id);
644             }
645         }
646         return ImmutableSet.copyOf(Sets.difference(allIds, writtenIds));
647     }
648 
649     private static final ImmutableMap<String, IcuLocaleDir> LOCALE_SPLIT_INFO =
650         ImmutableMap.<String, IcuLocaleDir>builder()
651             // BRKITR
652             .put("boundaries", BRKITR)
653             .put("dictionaries", BRKITR)
654             .put("exceptions", BRKITR)
655             // COLL
656             .put("collations", COLL)
657             .put("depends", COLL)
658             .put("UCARules", COLL)
659             // CURR
660             .put("Currencies", CURR)
661             .put("CurrencyPlurals", CURR)
662             .put("CurrencyUnitPatterns", CURR)
663             .put("currencySpacing", CURR)
664             // LANG
665             .put("Keys", LANG)
666             .put("Languages", LANG)
667             .put("Scripts", LANG)
668             .put("Types", LANG)
669             .put("Variants", LANG)
670             .put("characterLabelPattern", LANG)
671             .put("codePatterns", LANG)
672             .put("localeDisplayPattern", LANG)
673             // RBNF
674             .put("RBNFRules", RBNF)
675             // REGION
676             .put("Countries", REGION)
677             // UNIT
678             .put("durationUnits", UNIT)
679             .put("units", UNIT)
680             .put("unitsShort", UNIT)
681             .put("unitsNarrow", UNIT)
682             // ZONE
683             .put("zoneStrings", ZONE)
684             .build();
685 }
686