• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu.ant;
4 
5 import static com.google.common.base.CharMatcher.inRange;
6 import static com.google.common.base.CharMatcher.is;
7 import static com.google.common.base.CharMatcher.whitespace;
8 import static com.google.common.base.Preconditions.checkArgument;
9 import static com.google.common.base.Preconditions.checkNotNull;
10 import static com.google.common.base.Preconditions.checkState;
11 import static com.google.common.collect.ImmutableList.toImmutableList;
12 import static com.google.common.collect.ImmutableMap.toImmutableMap;
13 import static com.google.common.collect.ImmutableTable.toImmutableTable;
14 import static com.google.common.collect.Tables.immutableCell;
15 import static java.util.stream.Collectors.joining;
16 import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath;
17 
18 import java.nio.file.Path;
19 import java.nio.file.Paths;
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.List;
23 import java.util.Set;
24 import java.util.function.Predicate;
25 import java.util.regex.Pattern;
26 import java.util.stream.Collectors;
27 
28 import org.apache.tools.ant.BuildException;
29 import org.apache.tools.ant.Task;
30 import org.unicode.cldr.api.CldrDataSupplier;
31 import org.unicode.cldr.api.CldrDraftStatus;
32 import org.unicode.cldr.api.CldrPath;
33 import org.unicode.icu.tool.cldrtoicu.AlternateLocaleData;
34 import org.unicode.icu.tool.cldrtoicu.IcuConverterConfig;
35 import org.unicode.icu.tool.cldrtoicu.LdmlConverter;
36 import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType;
37 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
38 import org.unicode.icu.tool.cldrtoicu.PseudoLocales;
39 import org.unicode.icu.tool.cldrtoicu.SupplementalData;
40 
41 import com.google.common.base.Ascii;
42 import com.google.common.base.CaseFormat;
43 import com.google.common.base.CharMatcher;
44 import com.google.common.base.Splitter;
45 import com.google.common.collect.HashMultimap;
46 import com.google.common.collect.ImmutableList;
47 import com.google.common.collect.ImmutableMap;
48 import com.google.common.collect.ImmutableSet;
49 import com.google.common.collect.ImmutableTable;
50 import com.google.common.collect.Iterables;
51 import com.google.common.collect.SetMultimap;
52 import com.google.common.collect.Sets;
53 import com.google.common.collect.Table.Cell;
54 
55 // Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed.
56 public final class ConvertIcuDataTask extends Task {
57     private static final Splitter LIST_SPLITTER =
58         Splitter.on(CharMatcher.anyOf(",\n")).trimResults(whitespace()).omitEmptyStrings();
59 
60     private static final CharMatcher DIGIT_OR_UNDERSCORE = inRange('0', '9').or(is('_'));
61     private static final CharMatcher UPPER_UNDERSCORE = inRange('A', 'Z').or(DIGIT_OR_UNDERSCORE);
62     private static final CharMatcher LOWER_UNDERSCORE = inRange('a', 'z').or(DIGIT_OR_UNDERSCORE);
63     private static final CharMatcher VALID_ENUM_CHAR = LOWER_UNDERSCORE.or(UPPER_UNDERSCORE);
64 
65     private Path cldrPath;
66     private CldrDraftStatus minimumDraftStatus;
67     // Set of default locale ID specifiers (wildcard IDs which are expanded).
68     private LocaleIds localeIds = null;
69     // Per directory overrides (fully specified locale IDs).
70     private final SetMultimap<IcuLocaleDir, String> perDirectoryIds = HashMultimap.create();
71     private final SetMultimap<IcuLocaleDir, String> inheritLanguageSubtag = HashMultimap.create();
72     private final IcuConverterConfig.Builder config = IcuConverterConfig.builder();
73     // Don't try and resolve actual paths until inside the execute method.
74     private final List<AltPath> altPaths = new ArrayList<>();
75     // TODO(CLDR-13381): Move into CLDR API; e.g. withPseudoLocales()
76     private boolean includePseudoLocales = false;
77     private Predicate<String> idFilter = id -> true;
78 
79     @SuppressWarnings("unused")
setOutputDir(String path)80     public void setOutputDir(String path) {
81         // Use String here since on some systems Ant doesn't support automatically converting Path instances.
82         config.setOutputDir(Paths.get(path));
83     }
84 
85     @SuppressWarnings("unused")
setCldrDir(String path)86     public void setCldrDir(String path) {
87         // Use String here since on some systems Ant doesn't support automatically converting Path instances.
88         this.cldrPath = checkNotNull(Paths.get(path));
89     }
90 
91     @SuppressWarnings("unused")
setIcuVersion(String icuVersion)92     public void setIcuVersion(String icuVersion) {
93         config.setIcuVersion(icuVersion);
94     }
95 
96     @SuppressWarnings("unused")
setIcuDataVersion(String icuDataVersion)97     public void setIcuDataVersion(String icuDataVersion) {
98         config.setIcuDataVersion(icuDataVersion);
99     }
100 
101     @SuppressWarnings("unused")
setCldrVersion(String cldrVersion)102     public void setCldrVersion(String cldrVersion) {
103         config.setCldrVersion(cldrVersion);
104     }
105 
106     @SuppressWarnings("unused")
setMinimalDraftStatus(String status)107     public void setMinimalDraftStatus(String status) {
108         minimumDraftStatus = resolve(CldrDraftStatus.class, status);
109     }
110 
111     @SuppressWarnings("unused")
setOutputTypes(String types)112     public void setOutputTypes(String types) {
113         ImmutableList<OutputType> typeList =
114             LIST_SPLITTER
115                 .splitToList(types).stream()
116                 .map(s -> resolve(OutputType.class, s))
117                 .collect(toImmutableList());
118         if (!typeList.isEmpty()) {
119             config.setOutputTypes(typeList);
120         }
121     }
122 
123     @SuppressWarnings("unused")
setSpecialsDir(String path)124     public void setSpecialsDir(String path) {
125         // Use String here since on some systems Ant doesn't support automatically converting Path instances.
126         config.setSpecialsDir(Paths.get(path));
127     }
128 
129     @SuppressWarnings("unused")
setIncludePseudoLocales(boolean includePseudoLocales)130     public void setIncludePseudoLocales(boolean includePseudoLocales) {
131         this.includePseudoLocales = includePseudoLocales;
132     }
133 
134     @SuppressWarnings("unused")
setLocaleIdFilter(String idFilterRegex)135     public void setLocaleIdFilter(String idFilterRegex) {
136         this.idFilter = Pattern.compile(idFilterRegex).asPredicate();
137     }
138 
139     @SuppressWarnings("unused")
setEmitReport(boolean emit)140     public void setEmitReport(boolean emit) {
141         config.setEmitReport(emit);
142     }
143 
144     public static final class LocaleIds extends Task {
145         private ImmutableSet<String> ids;
146 
147         @SuppressWarnings("unused")
addText(String localeIds)148         public void addText(String localeIds) {
149             this.ids = parseLocaleIds(localeIds);
150         }
151 
152         @Override
init()153         public void init() throws BuildException {
154             checkBuild(!ids.isEmpty(), "Locale IDs must be specified");
155         }
156     }
157 
158     public static final class Directory extends Task {
159         private IcuLocaleDir dir;
160         private ImmutableSet<String> inheritLanguageSubtag = ImmutableSet.of();
161         private final List<ForcedAlias> forcedAliases = new ArrayList<>();
162         private LocaleIds localeIds = null;
163 
164         @SuppressWarnings("unused")
setDir(String directory)165         public void setDir(String directory) {
166             this.dir = resolve(IcuLocaleDir.class, directory);
167         }
168 
169         @SuppressWarnings("unused")
setInheritLanguageSubtag(String localeIds)170         public void setInheritLanguageSubtag(String localeIds) {
171             this.inheritLanguageSubtag = parseLocaleIds(localeIds);
172         }
173 
174         @SuppressWarnings("unused")
addConfiguredForcedAlias(ForcedAlias alias)175         public void addConfiguredForcedAlias(ForcedAlias alias) {
176             forcedAliases.add(alias);
177         }
178 
179         @SuppressWarnings("unused")
addConfiguredLocaleIds(LocaleIds localeIds)180         public void addConfiguredLocaleIds(LocaleIds localeIds) {
181             checkBuild(this.localeIds == null,
182                 "Cannot add more that one <localeIds> element for <directory>: %s", dir);
183             this.localeIds =  localeIds;
184         }
185 
186         @Override
init()187         public void init() throws BuildException {
188             checkBuild(dir != null, "Directory attribute 'dir' must be specified");
189             checkBuild(localeIds != null, "<localeIds> must be specified for <directory>: %s", dir);
190         }
191     }
192 
193     public static final class ForcedAlias extends Task {
194         private String source = "";
195         private String target = "";
196 
197         @SuppressWarnings("unused")
setSource(String source)198         public void setSource(String source) {
199             this.source = whitespace().trimFrom(source);
200         }
201 
202         @SuppressWarnings("unused")
setTarget(String target)203         public void setTarget(String target) {
204             this.target = whitespace().trimFrom(target);
205         }
206 
207         @Override
init()208         public void init() throws BuildException {
209             checkBuild(!source.isEmpty(), "Alias source must not be empty");
210             checkBuild(!target.isEmpty(), "Alias target must not be empty");
211         }
212     }
213 
214     public static final class AltPath extends Task {
215         private String source = "";
216         private String target = "";
217         private ImmutableSet<String> localeIds = ImmutableSet.of();
218 
219         @SuppressWarnings("unused")
setTarget(String target)220         public void setTarget(String target) {
221             this.target = target.replace('\'', '"');
222         }
223 
224         @SuppressWarnings("unused")
setSource(String source)225         public void setSource(String source) {
226             this.source = source.replace('\'', '"');
227         }
228 
229         @SuppressWarnings("unused")
setLocales(String localeIds)230         public void setLocales(String localeIds) {
231             this.localeIds = parseLocaleIds(localeIds);
232         }
233 
234         @Override
init()235         public void init() throws BuildException {
236             checkBuild(!source.isEmpty(), "Source path not be empty");
237             checkBuild(!target.isEmpty(), "Target path not be empty");
238         }
239     }
240 
241     @SuppressWarnings("unused")
addConfiguredLocaleIds(LocaleIds localeIds)242     public void addConfiguredLocaleIds(LocaleIds localeIds) {
243         checkBuild(this.localeIds == null, "Cannot add more that one <localeIds> element");
244         this.localeIds =  localeIds;
245     }
246 
247     @SuppressWarnings("unused")
addConfiguredDirectory(Directory filter)248     public void addConfiguredDirectory(Directory filter) {
249         checkState(!perDirectoryIds.containsKey(filter.dir),
250             "directory %s specified twice", filter.dir);
251         ImmutableSet<String> ids = filter.localeIds.ids;
252         perDirectoryIds.putAll(filter.dir, ids);
253 
254         // Check that any locale IDs marked to inherit the base language (instead of root) are
255         // listed in the set of generated locales.
256         inheritLanguageSubtag.putAll(filter.dir, filter.inheritLanguageSubtag);
257         if (!ids.containsAll(filter.inheritLanguageSubtag)) {
258             log(String.format(
259                 "WARNING: Locale IDs listed in 'inheritLanguageSubtag' should also be listed "
260                     + "in <localeIds> for that directory (%s): %s",
261                 filter.dir, String.join(", ", Sets.difference(filter.inheritLanguageSubtag, ids))));
262             perDirectoryIds.putAll(filter.dir, filter.inheritLanguageSubtag);
263         }
264 
265         // Check that locales specified for forced aliases in this directory are also listed in
266         // the set of generated locales.
267         filter.forcedAliases.forEach(a -> config.addForcedAlias(filter.dir, a.source, a.target));
268         Set<String> sourceIds =
269             filter.forcedAliases.stream().map(a -> a.source).collect(Collectors.toSet());
270         if (!ids.containsAll(sourceIds)) {
271             Set<String> missingIds = Sets.difference(sourceIds, ids);
272             log(String.format(
273                 "WARNING: Locale IDs listed as sources of a <forcedAlias> should also be listed "
274                     + "in <localeIds> for that directory (%s): %s",
275                 filter.dir, String.join(", ", missingIds)));
276             perDirectoryIds.putAll(filter.dir, missingIds);
277         }
278         Set<String> targetIds =
279             filter.forcedAliases.stream().map(a -> a.target).collect(Collectors.toSet());
280         if (!ids.containsAll(targetIds)) {
281             Set<String> missingIds = Sets.difference(targetIds, ids);
282             log(String.format(
283                 "WARNING: Locale IDs listed as targets of a <forcedAlias> should also be listed "
284                     + "in <localeIds> for that directory (%s): %s",
285                 filter.dir, String.join(", ", missingIds)));
286             perDirectoryIds.putAll(filter.dir, missingIds);
287         }
288     }
289 
290     // Aliases on the outside are applied to all directories.
291     @SuppressWarnings("unused")
addConfiguredForcedAlias(ForcedAlias alias)292     public void addConfiguredForcedAlias(ForcedAlias alias) {
293         for (IcuLocaleDir dir : IcuLocaleDir.values()) {
294             config.addForcedAlias(dir, alias.source, alias.target);
295         }
296     }
297 
298     @SuppressWarnings("unused")
addConfiguredAltPath(AltPath altPath)299     public void addConfiguredAltPath(AltPath altPath) {
300         // Don't convert to CldrPath here (it triggers a bunch of CLDR data loading for the DTDs).
301         // Wait until the "execute()" method since in future we expect to use the configured CLDR
302         // directory explicitly there.
303         altPaths.add(altPath);
304     }
305 
306     @SuppressWarnings("unused")
execute()307     public void execute() throws BuildException {
308         checkBuild(localeIds != null, "<localeIds> must be specified");
309 
310         CldrDataSupplier src = CldrDataSupplier
311             .forCldrFilesIn(cldrPath)
312             .withDraftStatusAtLeast(minimumDraftStatus);
313 
314         // We must do this wrapping of the data supplier _before_ creating the supplemental data
315         // instance since adding pseudo locales affects the set of available locales.
316         // TODO: Move some/all of this into the base converter and control it via the config.
317         if (!altPaths.isEmpty()) {
318             src = AlternateLocaleData.transform(src, getGlobalAltPaths(), getLocaleAltPaths());
319         }
320         if (includePseudoLocales) {
321             src = PseudoLocales.addPseudoLocalesTo(src);
322         }
323 
324         SupplementalData supplementalData = SupplementalData.create(src);
325         ImmutableSet<String> defaultTargetIds =
326             LocaleIdResolver.expandTargetIds(this.localeIds.ids, supplementalData);
327         for (IcuLocaleDir dir : IcuLocaleDir.values()) {
328             Iterable<String> ids = perDirectoryIds.asMap().getOrDefault(dir, defaultTargetIds);
329             config.addLocaleIds(dir, Iterables.filter(ids, idFilter::test));
330 
331             // We should only have locale IDs like "zh_Hant" here (language + script) and only
332             // those which would naturally inherit to "root"
333             inheritLanguageSubtag.get(dir).forEach(id -> {
334                 checkArgument(id.matches("[a-z]{2}_[A-Z][a-z]{3}"),
335                     "Invalid locale ID for inheritLanguageSubtag (expect '<lang>_<Script>'): ", id);
336                 checkArgument(supplementalData.getParent(id).equals("root"),
337                     "Invalid locale ID for inheritLanguageSubtag (parent must be 'root'): ", id);
338                 config.addForcedParent(dir, id, id.substring(0, 2));
339             });
340         }
341         config.setMinimumDraftStatus(minimumDraftStatus);
342         LdmlConverter.convert(src, supplementalData, config.build());
343     }
344 
getGlobalAltPaths()345     private ImmutableMap<CldrPath, CldrPath> getGlobalAltPaths() {
346         // This fails if the same key appears more than once.
347         return altPaths.stream()
348             .filter(a -> a.localeIds.isEmpty())
349             .collect(toImmutableMap(
350                 a -> parseDistinguishingPath(a.target),
351                 a -> parseDistinguishingPath(a.source)));
352     }
353 
getLocaleAltPaths()354     private ImmutableTable<String, CldrPath, CldrPath> getLocaleAltPaths() {
355         return altPaths.stream()
356             .flatMap(
357                 a -> a.localeIds.stream().map(
358                     id -> immutableCell(
359                         id,
360                         parseDistinguishingPath(a.target),
361                         parseDistinguishingPath(a.source))))
362             // Weirdly there's no collector method to just collect cells.
363             .collect(toImmutableTable(Cell::getRowKey, Cell::getColumnKey, Cell::getValue));
364     }
365 
checkBuild(boolean condition, String message, Object... args)366     private static void checkBuild(boolean condition, String message, Object... args) {
367         if (!condition) {
368             throw new BuildException(String.format(message, args));
369         }
370     }
371 
parseLocaleIds(String localeIds)372     private static ImmutableSet<String> parseLocaleIds(String localeIds) {
373         // Need to filter out '//' style end-of-line comments first (replace with \n to avoid
374         // inadvertantly joining two elements.
375         localeIds = localeIds.replaceAll("//[^\n]*\n", "\n");
376         return ImmutableSet.copyOf(LIST_SPLITTER.splitToList(localeIds));
377     }
378 
resolve(Class<T> enumClass, String name)379     private static <T extends Enum<T>> T resolve(Class<T> enumClass, String name) {
380         checkArgument(!name.isEmpty(), "enumeration name cannot be empty");
381         checkArgument(VALID_ENUM_CHAR.matchesAllOf(name),
382             "invalid enumeration name '%s'; expected only ASCII letters or '_'", name);
383         CaseFormat format;
384         if (UPPER_UNDERSCORE.matchesAllOf(name)) {
385             format = CaseFormat.UPPER_UNDERSCORE;
386         } else if (LOWER_UNDERSCORE.matchesAllOf(name)) {
387             format = CaseFormat.LOWER_UNDERSCORE;
388         } else {
389             // Mixed case with '_' is not permitted.
390             checkArgument(!name.contains("_"),
391                 "invalid enumeration name '%s'; mixed case with underscore not allowed: %s", name);
392             format =
393                 Ascii.isLowerCase(name.charAt(0)) ? CaseFormat.LOWER_CAMEL : CaseFormat.UPPER_CAMEL;
394         }
395         try {
396             return Enum.valueOf(enumClass, format.to(CaseFormat.UPPER_UNDERSCORE, name));
397         } catch (IllegalArgumentException e) {
398             String validNames =
399                 Arrays.stream(enumClass.getEnumConstants())
400                     .map(Object::toString)
401                     .collect(joining(", "));
402             throw new IllegalArgumentException(
403                 "invalid enumeration name " + name + "; expected one of; " + validNames);
404         }
405     }
406 }
407