• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu.ant;
4 
5 import static com.google.common.base.CharMatcher.inRange;
6 import static com.google.common.base.CharMatcher.is;
7 import static com.google.common.base.CharMatcher.whitespace;
8 import static com.google.common.base.Preconditions.checkArgument;
9 import static com.google.common.base.Preconditions.checkNotNull;
10 import static com.google.common.base.Preconditions.checkState;
11 import static com.google.common.collect.ImmutableList.toImmutableList;
12 import static com.google.common.collect.ImmutableMap.toImmutableMap;
13 import static com.google.common.collect.ImmutableTable.toImmutableTable;
14 import static com.google.common.collect.Tables.immutableCell;
15 import static java.util.stream.Collectors.joining;
16 import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath;
17 
18 import java.nio.file.Path;
19 import java.nio.file.Paths;
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.List;
23 import java.util.Set;
24 import java.util.function.Predicate;
25 import java.util.regex.Pattern;
26 import java.util.stream.Collectors;
27 
28 import org.apache.tools.ant.BuildException;
29 import org.apache.tools.ant.Task;
30 import org.unicode.cldr.api.CldrDataSupplier;
31 import org.unicode.cldr.api.CldrDraftStatus;
32 import org.unicode.cldr.api.CldrPath;
33 import org.unicode.cldr.util.CLDRConfig;
34 import org.unicode.icu.tool.cldrtoicu.AlternateLocaleData;
35 import org.unicode.icu.tool.cldrtoicu.IcuConverterConfig;
36 import org.unicode.icu.tool.cldrtoicu.LdmlConverter;
37 import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType;
38 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
39 import org.unicode.icu.tool.cldrtoicu.PseudoLocales;
40 import org.unicode.icu.tool.cldrtoicu.SupplementalData;
41 
42 import com.google.common.base.Ascii;
43 import com.google.common.base.CaseFormat;
44 import com.google.common.base.CharMatcher;
45 import com.google.common.base.Splitter;
46 import com.google.common.collect.HashMultimap;
47 import com.google.common.collect.ImmutableList;
48 import com.google.common.collect.ImmutableMap;
49 import com.google.common.collect.ImmutableSet;
50 import com.google.common.collect.ImmutableTable;
51 import com.google.common.collect.Iterables;
52 import com.google.common.collect.SetMultimap;
53 import com.google.common.collect.Sets;
54 import com.google.common.collect.Table.Cell;
55 
56 // Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed.
57 public final class ConvertIcuDataTask extends Task {
58     private static final Splitter LIST_SPLITTER =
59         Splitter.on(CharMatcher.anyOf(",\n")).trimResults(whitespace()).omitEmptyStrings();
60 
61     private static final CharMatcher DIGIT_OR_UNDERSCORE = inRange('0', '9').or(is('_'));
62     private static final CharMatcher UPPER_UNDERSCORE = inRange('A', 'Z').or(DIGIT_OR_UNDERSCORE);
63     private static final CharMatcher LOWER_UNDERSCORE = inRange('a', 'z').or(DIGIT_OR_UNDERSCORE);
64     private static final CharMatcher VALID_ENUM_CHAR = LOWER_UNDERSCORE.or(UPPER_UNDERSCORE);
65 
66     private Path cldrPath;
67     private CldrDraftStatus minimumDraftStatus;
68     // Set of default locale ID specifiers (wildcard IDs which are expanded).
69     private LocaleIds localeIds = null;
70     // Per directory overrides (fully specified locale IDs).
71     private final SetMultimap<IcuLocaleDir, String> perDirectoryIds = HashMultimap.create();
72     private final SetMultimap<IcuLocaleDir, String> inheritLanguageSubtag = HashMultimap.create();
73     private final IcuConverterConfig.Builder config = IcuConverterConfig.builder();
74     // Don't try and resolve actual paths until inside the execute method.
75     private final List<AltPath> altPaths = new ArrayList<>();
76     // TODO(CLDR-13381): Move into CLDR API; e.g. withPseudoLocales()
77     private boolean includePseudoLocales = false;
78     private Predicate<String> idFilter = id -> true;
79 
80     @SuppressWarnings("unused")
setOutputDir(String path)81     public void setOutputDir(String path) {
82         // Use String here since on some systems Ant doesn't support automatically converting Path instances.
83         config.setOutputDir(Paths.get(path));
84     }
85 
86     @SuppressWarnings("unused")
setCldrDir(String path)87     public void setCldrDir(String path) {
88         // Use String here since on some systems Ant doesn't support automatically converting Path instances.
89         this.cldrPath = checkNotNull(Paths.get(path));
90     }
91 
92     @SuppressWarnings("unused")
setIcuVersion(String icuVersion)93     public void setIcuVersion(String icuVersion) {
94         config.setIcuVersion(icuVersion);
95     }
96 
97     @SuppressWarnings("unused")
setIcuDataVersion(String icuDataVersion)98     public void setIcuDataVersion(String icuDataVersion) {
99         config.setIcuDataVersion(icuDataVersion);
100     }
101 
102     @SuppressWarnings("unused")
setCldrVersion(String cldrVersion)103     public void setCldrVersion(String cldrVersion) {
104         config.setCldrVersion(cldrVersion);
105     }
106 
107     @SuppressWarnings("unused")
setMinimalDraftStatus(String status)108     public void setMinimalDraftStatus(String status) {
109         minimumDraftStatus = resolve(CldrDraftStatus.class, status);
110     }
111 
112     @SuppressWarnings("unused")
setOutputTypes(String types)113     public void setOutputTypes(String types) {
114         ImmutableList<OutputType> typeList =
115             LIST_SPLITTER
116                 .splitToList(types).stream()
117                 .map(s -> resolve(OutputType.class, s))
118                 .collect(toImmutableList());
119         if (!typeList.isEmpty()) {
120             config.setOutputTypes(typeList);
121         }
122     }
123 
124     @SuppressWarnings("unused")
setSpecialsDir(String path)125     public void setSpecialsDir(String path) {
126         // Use String here since on some systems Ant doesn't support automatically converting Path instances.
127         config.setSpecialsDir(Paths.get(path));
128     }
129 
130     @SuppressWarnings("unused")
setIncludePseudoLocales(boolean includePseudoLocales)131     public void setIncludePseudoLocales(boolean includePseudoLocales) {
132         this.includePseudoLocales = includePseudoLocales;
133     }
134 
135     @SuppressWarnings("unused")
setLocaleIdFilter(String idFilterRegex)136     public void setLocaleIdFilter(String idFilterRegex) {
137         this.idFilter = Pattern.compile(idFilterRegex).asPredicate();
138     }
139 
140     @SuppressWarnings("unused")
setEmitReport(boolean emit)141     public void setEmitReport(boolean emit) {
142         config.setEmitReport(emit);
143     }
144 
145     public static final class LocaleIds extends Task {
146         private ImmutableSet<String> ids;
147 
148         @SuppressWarnings("unused")
addText(String localeIds)149         public void addText(String localeIds) {
150             this.ids = parseLocaleIds(localeIds);
151         }
152 
153         @Override
init()154         public void init() throws BuildException {
155             checkBuild(!ids.isEmpty(), "Locale IDs must be specified");
156         }
157     }
158 
159     public static final class Directory extends Task {
160         private IcuLocaleDir dir;
161         private ImmutableSet<String> inheritLanguageSubtag = ImmutableSet.of();
162         private final List<ForcedAlias> forcedAliases = new ArrayList<>();
163         private LocaleIds localeIds = null;
164 
165         @SuppressWarnings("unused")
setDir(String directory)166         public void setDir(String directory) {
167             this.dir = resolve(IcuLocaleDir.class, directory);
168         }
169 
170         @SuppressWarnings("unused")
setInheritLanguageSubtag(String localeIds)171         public void setInheritLanguageSubtag(String localeIds) {
172             this.inheritLanguageSubtag = parseLocaleIds(localeIds);
173         }
174 
175         @SuppressWarnings("unused")
addConfiguredForcedAlias(ForcedAlias alias)176         public void addConfiguredForcedAlias(ForcedAlias alias) {
177             forcedAliases.add(alias);
178         }
179 
180         @SuppressWarnings("unused")
addConfiguredLocaleIds(LocaleIds localeIds)181         public void addConfiguredLocaleIds(LocaleIds localeIds) {
182             checkBuild(this.localeIds == null,
183                 "Cannot add more that one <localeIds> element for <directory>: %s", dir);
184             this.localeIds =  localeIds;
185         }
186 
187         @Override
init()188         public void init() throws BuildException {
189             checkBuild(dir != null, "Directory attribute 'dir' must be specified");
190             checkBuild(localeIds != null, "<localeIds> must be specified for <directory>: %s", dir);
191         }
192     }
193 
194     public static final class ForcedAlias extends Task {
195         private String source = "";
196         private String target = "";
197 
198         @SuppressWarnings("unused")
setSource(String source)199         public void setSource(String source) {
200             this.source = whitespace().trimFrom(source);
201         }
202 
203         @SuppressWarnings("unused")
setTarget(String target)204         public void setTarget(String target) {
205             this.target = whitespace().trimFrom(target);
206         }
207 
208         @Override
init()209         public void init() throws BuildException {
210             checkBuild(!source.isEmpty(), "Alias source must not be empty");
211             checkBuild(!target.isEmpty(), "Alias target must not be empty");
212         }
213     }
214 
215     public static final class AltPath extends Task {
216         private String source = "";
217         private String target = "";
218         private ImmutableSet<String> localeIds = ImmutableSet.of();
219 
220         @SuppressWarnings("unused")
setTarget(String target)221         public void setTarget(String target) {
222             this.target = target.replace('\'', '"');
223         }
224 
225         @SuppressWarnings("unused")
setSource(String source)226         public void setSource(String source) {
227             this.source = source.replace('\'', '"');
228         }
229 
230         @SuppressWarnings("unused")
setLocales(String localeIds)231         public void setLocales(String localeIds) {
232             this.localeIds = parseLocaleIds(localeIds);
233         }
234 
235         @Override
init()236         public void init() throws BuildException {
237             checkBuild(!source.isEmpty(), "Source path not be empty");
238             checkBuild(!target.isEmpty(), "Target path not be empty");
239         }
240     }
241 
242     @SuppressWarnings("unused")
addConfiguredLocaleIds(LocaleIds localeIds)243     public void addConfiguredLocaleIds(LocaleIds localeIds) {
244         checkBuild(this.localeIds == null, "Cannot add more that one <localeIds> element");
245         this.localeIds =  localeIds;
246     }
247 
248     @SuppressWarnings("unused")
addConfiguredDirectory(Directory filter)249     public void addConfiguredDirectory(Directory filter) {
250         checkState(!perDirectoryIds.containsKey(filter.dir),
251             "directory %s specified twice", filter.dir);
252         ImmutableSet<String> ids = filter.localeIds.ids;
253         perDirectoryIds.putAll(filter.dir, ids);
254 
255         // Check that any locale IDs marked to inherit the base language (instead of root) are
256         // listed in the set of generated locales.
257         inheritLanguageSubtag.putAll(filter.dir, filter.inheritLanguageSubtag);
258         if (!ids.containsAll(filter.inheritLanguageSubtag)) {
259             log(String.format(
260                 "WARNING: Locale IDs listed in 'inheritLanguageSubtag' should also be listed "
261                     + "in <localeIds> for that directory (%s): %s",
262                 filter.dir, String.join(", ", Sets.difference(filter.inheritLanguageSubtag, ids))));
263             perDirectoryIds.putAll(filter.dir, filter.inheritLanguageSubtag);
264         }
265 
266         // Check that locales specified for forced aliases in this directory are also listed in
267         // the set of generated locales.
268         filter.forcedAliases.forEach(a -> config.addForcedAlias(filter.dir, a.source, a.target));
269         Set<String> sourceIds =
270             filter.forcedAliases.stream().map(a -> a.source).collect(Collectors.toSet());
271         if (!ids.containsAll(sourceIds)) {
272             Set<String> missingIds = Sets.difference(sourceIds, ids);
273             log(String.format(
274                 "WARNING: Locale IDs listed as sources of a <forcedAlias> should also be listed "
275                     + "in <localeIds> for that directory (%s): %s",
276                 filter.dir, String.join(", ", missingIds)));
277             perDirectoryIds.putAll(filter.dir, missingIds);
278         }
279         Set<String> targetIds =
280             filter.forcedAliases.stream().map(a -> a.target).collect(Collectors.toSet());
281         if (!ids.containsAll(targetIds)) {
282             Set<String> missingIds = Sets.difference(targetIds, ids);
283             log(String.format(
284                 "WARNING: Locale IDs listed as targets of a <forcedAlias> should also be listed "
285                     + "in <localeIds> for that directory (%s): %s",
286                 filter.dir, String.join(", ", missingIds)));
287             perDirectoryIds.putAll(filter.dir, missingIds);
288         }
289     }
290 
291     // Aliases on the outside are applied to all directories.
292     @SuppressWarnings("unused")
addConfiguredForcedAlias(ForcedAlias alias)293     public void addConfiguredForcedAlias(ForcedAlias alias) {
294         for (IcuLocaleDir dir : IcuLocaleDir.values()) {
295             config.addForcedAlias(dir, alias.source, alias.target);
296         }
297     }
298 
299     @SuppressWarnings("unused")
addConfiguredAltPath(AltPath altPath)300     public void addConfiguredAltPath(AltPath altPath) {
301         // Don't convert to CldrPath here (it triggers a bunch of CLDR data loading for the DTDs).
302         // Wait until the "execute()" method since in future we expect to use the configured CLDR
303         // directory explicitly there.
304         altPaths.add(altPath);
305     }
306 
307     @SuppressWarnings("unused")
execute()308     public void execute() throws BuildException {
309         // Spin up CLDRConfig outside of other inner loops, to
310         // avoid static init problems seen in CLDR-14636
311         CLDRConfig.getInstance().getSupplementalDataInfo();
312 
313         checkBuild(localeIds != null, "<localeIds> must be specified");
314 
315         CldrDataSupplier src = CldrDataSupplier
316             .forCldrFilesIn(cldrPath)
317             .withDraftStatusAtLeast(minimumDraftStatus);
318 
319         // We must do this wrapping of the data supplier _before_ creating the supplemental data
320         // instance since adding pseudo locales affects the set of available locales.
321         // TODO: Move some/all of this into the base converter and control it via the config.
322         if (!altPaths.isEmpty()) {
323             src = AlternateLocaleData.transform(src, getGlobalAltPaths(), getLocaleAltPaths());
324         }
325         if (includePseudoLocales) {
326             src = PseudoLocales.addPseudoLocalesTo(src);
327         }
328 
329         SupplementalData supplementalData = SupplementalData.create(src);
330         ImmutableSet<String> defaultTargetIds =
331             LocaleIdResolver.expandTargetIds(this.localeIds.ids, supplementalData);
332         for (IcuLocaleDir dir : IcuLocaleDir.values()) {
333             Iterable<String> ids = perDirectoryIds.asMap().getOrDefault(dir, defaultTargetIds);
334             config.addLocaleIds(dir, Iterables.filter(ids, idFilter::test));
335 
336             // We should only have locale IDs like "zh_Hant" here (language + script) and only
337             // those which would naturally inherit to "root"
338             inheritLanguageSubtag.get(dir).forEach(id -> {
339                 checkArgument(id.matches("[a-z]{2}_[A-Z][a-z]{3}"),
340                     "Invalid locale ID for inheritLanguageSubtag (expect '<lang>_<Script>'): ", id);
341                 checkArgument(supplementalData.getParent(id).equals("root"),
342                     "Invalid locale ID for inheritLanguageSubtag (parent must be 'root'): ", id);
343                 config.addForcedParent(dir, id, id.substring(0, 2));
344             });
345         }
346         config.setMinimumDraftStatus(minimumDraftStatus);
347         LdmlConverter.convert(src, supplementalData, config.build());
348     }
349 
getGlobalAltPaths()350     private ImmutableMap<CldrPath, CldrPath> getGlobalAltPaths() {
351         // This fails if the same key appears more than once.
352         return altPaths.stream()
353             .filter(a -> a.localeIds.isEmpty())
354             .collect(toImmutableMap(
355                 a -> parseDistinguishingPath(a.target),
356                 a -> parseDistinguishingPath(a.source)));
357     }
358 
getLocaleAltPaths()359     private ImmutableTable<String, CldrPath, CldrPath> getLocaleAltPaths() {
360         return altPaths.stream()
361             .flatMap(
362                 a -> a.localeIds.stream().map(
363                     id -> immutableCell(
364                         id,
365                         parseDistinguishingPath(a.target),
366                         parseDistinguishingPath(a.source))))
367             // Weirdly there's no collector method to just collect cells.
368             .collect(toImmutableTable(Cell::getRowKey, Cell::getColumnKey, Cell::getValue));
369     }
370 
checkBuild(boolean condition, String message, Object... args)371     private static void checkBuild(boolean condition, String message, Object... args) {
372         if (!condition) {
373             throw new BuildException(String.format(message, args));
374         }
375     }
376 
parseLocaleIds(String localeIds)377     private static ImmutableSet<String> parseLocaleIds(String localeIds) {
378         // Need to filter out '//' style end-of-line comments first (replace with \n to avoid
379         // inadvertently joining two elements.
380         localeIds = localeIds.replaceAll("//[^\n]*\n", "\n");
381         return ImmutableSet.copyOf(LIST_SPLITTER.splitToList(localeIds));
382     }
383 
resolve(Class<T> enumClass, String name)384     private static <T extends Enum<T>> T resolve(Class<T> enumClass, String name) {
385         checkArgument(!name.isEmpty(), "enumeration name cannot be empty");
386         checkArgument(VALID_ENUM_CHAR.matchesAllOf(name),
387             "invalid enumeration name '%s'; expected only ASCII letters or '_'", name);
388         CaseFormat format;
389         if (UPPER_UNDERSCORE.matchesAllOf(name)) {
390             format = CaseFormat.UPPER_UNDERSCORE;
391         } else if (LOWER_UNDERSCORE.matchesAllOf(name)) {
392             format = CaseFormat.LOWER_UNDERSCORE;
393         } else {
394             // Mixed case with '_' is not permitted.
395             checkArgument(!name.contains("_"),
396                 "invalid enumeration name '%s'; mixed case with underscore not allowed: %s", name);
397             format =
398                 Ascii.isLowerCase(name.charAt(0)) ? CaseFormat.LOWER_CAMEL : CaseFormat.UPPER_CAMEL;
399         }
400         try {
401             return Enum.valueOf(enumClass, format.to(CaseFormat.UPPER_UNDERSCORE, name));
402         } catch (IllegalArgumentException e) {
403             String validNames =
404                 Arrays.stream(enumClass.getEnumConstants())
405                     .map(Object::toString)
406                     .collect(joining(", "));
407             throw new IllegalArgumentException(
408                 "invalid enumeration name " + name + "; expected one of; " + validNames);
409         }
410     }
411 }
412