1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu.ant; 4 5 import static com.google.common.base.CharMatcher.inRange; 6 import static com.google.common.base.CharMatcher.is; 7 import static com.google.common.base.CharMatcher.whitespace; 8 import static com.google.common.base.Preconditions.checkArgument; 9 import static com.google.common.base.Preconditions.checkNotNull; 10 import static com.google.common.base.Preconditions.checkState; 11 import static com.google.common.collect.ImmutableList.toImmutableList; 12 import static com.google.common.collect.ImmutableMap.toImmutableMap; 13 import static com.google.common.collect.ImmutableTable.toImmutableTable; 14 import static com.google.common.collect.Tables.immutableCell; 15 import static java.util.stream.Collectors.joining; 16 import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath; 17 18 import java.nio.file.Path; 19 import java.nio.file.Paths; 20 import java.util.ArrayList; 21 import java.util.Arrays; 22 import java.util.List; 23 import java.util.Set; 24 import java.util.function.Predicate; 25 import java.util.regex.Pattern; 26 import java.util.stream.Collectors; 27 28 import org.apache.tools.ant.BuildException; 29 import org.apache.tools.ant.Task; 30 import org.unicode.cldr.api.CldrDataSupplier; 31 import org.unicode.cldr.api.CldrDraftStatus; 32 import org.unicode.cldr.api.CldrPath; 33 import org.unicode.icu.tool.cldrtoicu.AlternateLocaleData; 34 import org.unicode.icu.tool.cldrtoicu.IcuConverterConfig; 35 import org.unicode.icu.tool.cldrtoicu.LdmlConverter; 36 import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType; 37 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir; 38 import org.unicode.icu.tool.cldrtoicu.PseudoLocales; 39 import org.unicode.icu.tool.cldrtoicu.SupplementalData; 40 41 import com.google.common.base.Ascii; 42 import com.google.common.base.CaseFormat; 43 import com.google.common.base.CharMatcher; 44 import com.google.common.base.Splitter; 45 import com.google.common.collect.HashMultimap; 46 import com.google.common.collect.ImmutableList; 47 import com.google.common.collect.ImmutableMap; 48 import com.google.common.collect.ImmutableSet; 49 import com.google.common.collect.ImmutableTable; 50 import com.google.common.collect.Iterables; 51 import com.google.common.collect.SetMultimap; 52 import com.google.common.collect.Sets; 53 import com.google.common.collect.Table.Cell; 54 55 // Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed. 56 public final class ConvertIcuDataTask extends Task { 57 private static final Splitter LIST_SPLITTER = 58 Splitter.on(CharMatcher.anyOf(",\n")).trimResults(whitespace()).omitEmptyStrings(); 59 60 private static final CharMatcher DIGIT_OR_UNDERSCORE = inRange('0', '9').or(is('_')); 61 private static final CharMatcher UPPER_UNDERSCORE = inRange('A', 'Z').or(DIGIT_OR_UNDERSCORE); 62 private static final CharMatcher LOWER_UNDERSCORE = inRange('a', 'z').or(DIGIT_OR_UNDERSCORE); 63 private static final CharMatcher VALID_ENUM_CHAR = LOWER_UNDERSCORE.or(UPPER_UNDERSCORE); 64 65 private Path cldrPath; 66 private CldrDraftStatus minimumDraftStatus; 67 // Set of default locale ID specifiers (wildcard IDs which are expanded). 68 private LocaleIds localeIds = null; 69 // Per directory overrides (fully specified locale IDs). 70 private final SetMultimap<IcuLocaleDir, String> perDirectoryIds = HashMultimap.create(); 71 private final SetMultimap<IcuLocaleDir, String> inheritLanguageSubtag = HashMultimap.create(); 72 private final IcuConverterConfig.Builder config = IcuConverterConfig.builder(); 73 // Don't try and resolve actual paths until inside the execute method. 74 private final List<AltPath> altPaths = new ArrayList<>(); 75 // TODO(CLDR-13381): Move into CLDR API; e.g. withPseudoLocales() 76 private boolean includePseudoLocales = false; 77 private Predicate<String> idFilter = id -> true; 78 79 @SuppressWarnings("unused") setOutputDir(String path)80 public void setOutputDir(String path) { 81 // Use String here since on some systems Ant doesn't support automatically converting Path instances. 82 config.setOutputDir(Paths.get(path)); 83 } 84 85 @SuppressWarnings("unused") setCldrDir(String path)86 public void setCldrDir(String path) { 87 // Use String here since on some systems Ant doesn't support automatically converting Path instances. 88 this.cldrPath = checkNotNull(Paths.get(path)); 89 } 90 91 @SuppressWarnings("unused") setIcuVersion(String icuVersion)92 public void setIcuVersion(String icuVersion) { 93 config.setIcuVersion(icuVersion); 94 } 95 96 @SuppressWarnings("unused") setIcuDataVersion(String icuDataVersion)97 public void setIcuDataVersion(String icuDataVersion) { 98 config.setIcuDataVersion(icuDataVersion); 99 } 100 101 @SuppressWarnings("unused") setCldrVersion(String cldrVersion)102 public void setCldrVersion(String cldrVersion) { 103 config.setCldrVersion(cldrVersion); 104 } 105 106 @SuppressWarnings("unused") setMinimalDraftStatus(String status)107 public void setMinimalDraftStatus(String status) { 108 minimumDraftStatus = resolve(CldrDraftStatus.class, status); 109 } 110 111 @SuppressWarnings("unused") setOutputTypes(String types)112 public void setOutputTypes(String types) { 113 ImmutableList<OutputType> typeList = 114 LIST_SPLITTER 115 .splitToList(types).stream() 116 .map(s -> resolve(OutputType.class, s)) 117 .collect(toImmutableList()); 118 if (!typeList.isEmpty()) { 119 config.setOutputTypes(typeList); 120 } 121 } 122 123 @SuppressWarnings("unused") setSpecialsDir(String path)124 public void setSpecialsDir(String path) { 125 // Use String here since on some systems Ant doesn't support automatically converting Path instances. 126 config.setSpecialsDir(Paths.get(path)); 127 } 128 129 @SuppressWarnings("unused") setIncludePseudoLocales(boolean includePseudoLocales)130 public void setIncludePseudoLocales(boolean includePseudoLocales) { 131 this.includePseudoLocales = includePseudoLocales; 132 } 133 134 @SuppressWarnings("unused") setLocaleIdFilter(String idFilterRegex)135 public void setLocaleIdFilter(String idFilterRegex) { 136 this.idFilter = Pattern.compile(idFilterRegex).asPredicate(); 137 } 138 139 @SuppressWarnings("unused") setEmitReport(boolean emit)140 public void setEmitReport(boolean emit) { 141 config.setEmitReport(emit); 142 } 143 144 public static final class LocaleIds extends Task { 145 private ImmutableSet<String> ids; 146 147 @SuppressWarnings("unused") addText(String localeIds)148 public void addText(String localeIds) { 149 this.ids = parseLocaleIds(localeIds); 150 } 151 152 @Override init()153 public void init() throws BuildException { 154 checkBuild(!ids.isEmpty(), "Locale IDs must be specified"); 155 } 156 } 157 158 public static final class Directory extends Task { 159 private IcuLocaleDir dir; 160 private ImmutableSet<String> inheritLanguageSubtag = ImmutableSet.of(); 161 private final List<ForcedAlias> forcedAliases = new ArrayList<>(); 162 private LocaleIds localeIds = null; 163 164 @SuppressWarnings("unused") setDir(String directory)165 public void setDir(String directory) { 166 this.dir = resolve(IcuLocaleDir.class, directory); 167 } 168 169 @SuppressWarnings("unused") setInheritLanguageSubtag(String localeIds)170 public void setInheritLanguageSubtag(String localeIds) { 171 this.inheritLanguageSubtag = parseLocaleIds(localeIds); 172 } 173 174 @SuppressWarnings("unused") addConfiguredForcedAlias(ForcedAlias alias)175 public void addConfiguredForcedAlias(ForcedAlias alias) { 176 forcedAliases.add(alias); 177 } 178 179 @SuppressWarnings("unused") addConfiguredLocaleIds(LocaleIds localeIds)180 public void addConfiguredLocaleIds(LocaleIds localeIds) { 181 checkBuild(this.localeIds == null, 182 "Cannot add more that one <localeIds> element for <directory>: %s", dir); 183 this.localeIds = localeIds; 184 } 185 186 @Override init()187 public void init() throws BuildException { 188 checkBuild(dir != null, "Directory attribute 'dir' must be specified"); 189 checkBuild(localeIds != null, "<localeIds> must be specified for <directory>: %s", dir); 190 } 191 } 192 193 public static final class ForcedAlias extends Task { 194 private String source = ""; 195 private String target = ""; 196 197 @SuppressWarnings("unused") setSource(String source)198 public void setSource(String source) { 199 this.source = whitespace().trimFrom(source); 200 } 201 202 @SuppressWarnings("unused") setTarget(String target)203 public void setTarget(String target) { 204 this.target = whitespace().trimFrom(target); 205 } 206 207 @Override init()208 public void init() throws BuildException { 209 checkBuild(!source.isEmpty(), "Alias source must not be empty"); 210 checkBuild(!target.isEmpty(), "Alias target must not be empty"); 211 } 212 } 213 214 public static final class AltPath extends Task { 215 private String source = ""; 216 private String target = ""; 217 private ImmutableSet<String> localeIds = ImmutableSet.of(); 218 219 @SuppressWarnings("unused") setTarget(String target)220 public void setTarget(String target) { 221 this.target = target.replace('\'', '"'); 222 } 223 224 @SuppressWarnings("unused") setSource(String source)225 public void setSource(String source) { 226 this.source = source.replace('\'', '"'); 227 } 228 229 @SuppressWarnings("unused") setLocales(String localeIds)230 public void setLocales(String localeIds) { 231 this.localeIds = parseLocaleIds(localeIds); 232 } 233 234 @Override init()235 public void init() throws BuildException { 236 checkBuild(!source.isEmpty(), "Source path not be empty"); 237 checkBuild(!target.isEmpty(), "Target path not be empty"); 238 } 239 } 240 241 @SuppressWarnings("unused") addConfiguredLocaleIds(LocaleIds localeIds)242 public void addConfiguredLocaleIds(LocaleIds localeIds) { 243 checkBuild(this.localeIds == null, "Cannot add more that one <localeIds> element"); 244 this.localeIds = localeIds; 245 } 246 247 @SuppressWarnings("unused") addConfiguredDirectory(Directory filter)248 public void addConfiguredDirectory(Directory filter) { 249 checkState(!perDirectoryIds.containsKey(filter.dir), 250 "directory %s specified twice", filter.dir); 251 ImmutableSet<String> ids = filter.localeIds.ids; 252 perDirectoryIds.putAll(filter.dir, ids); 253 254 // Check that any locale IDs marked to inherit the base language (instead of root) are 255 // listed in the set of generated locales. 256 inheritLanguageSubtag.putAll(filter.dir, filter.inheritLanguageSubtag); 257 if (!ids.containsAll(filter.inheritLanguageSubtag)) { 258 log(String.format( 259 "WARNING: Locale IDs listed in 'inheritLanguageSubtag' should also be listed " 260 + "in <localeIds> for that directory (%s): %s", 261 filter.dir, String.join(", ", Sets.difference(filter.inheritLanguageSubtag, ids)))); 262 perDirectoryIds.putAll(filter.dir, filter.inheritLanguageSubtag); 263 } 264 265 // Check that locales specified for forced aliases in this directory are also listed in 266 // the set of generated locales. 267 filter.forcedAliases.forEach(a -> config.addForcedAlias(filter.dir, a.source, a.target)); 268 Set<String> sourceIds = 269 filter.forcedAliases.stream().map(a -> a.source).collect(Collectors.toSet()); 270 if (!ids.containsAll(sourceIds)) { 271 Set<String> missingIds = Sets.difference(sourceIds, ids); 272 log(String.format( 273 "WARNING: Locale IDs listed as sources of a <forcedAlias> should also be listed " 274 + "in <localeIds> for that directory (%s): %s", 275 filter.dir, String.join(", ", missingIds))); 276 perDirectoryIds.putAll(filter.dir, missingIds); 277 } 278 Set<String> targetIds = 279 filter.forcedAliases.stream().map(a -> a.target).collect(Collectors.toSet()); 280 if (!ids.containsAll(targetIds)) { 281 Set<String> missingIds = Sets.difference(targetIds, ids); 282 log(String.format( 283 "WARNING: Locale IDs listed as targets of a <forcedAlias> should also be listed " 284 + "in <localeIds> for that directory (%s): %s", 285 filter.dir, String.join(", ", missingIds))); 286 perDirectoryIds.putAll(filter.dir, missingIds); 287 } 288 } 289 290 // Aliases on the outside are applied to all directories. 291 @SuppressWarnings("unused") addConfiguredForcedAlias(ForcedAlias alias)292 public void addConfiguredForcedAlias(ForcedAlias alias) { 293 for (IcuLocaleDir dir : IcuLocaleDir.values()) { 294 config.addForcedAlias(dir, alias.source, alias.target); 295 } 296 } 297 298 @SuppressWarnings("unused") addConfiguredAltPath(AltPath altPath)299 public void addConfiguredAltPath(AltPath altPath) { 300 // Don't convert to CldrPath here (it triggers a bunch of CLDR data loading for the DTDs). 301 // Wait until the "execute()" method since in future we expect to use the configured CLDR 302 // directory explicitly there. 303 altPaths.add(altPath); 304 } 305 306 @SuppressWarnings("unused") execute()307 public void execute() throws BuildException { 308 checkBuild(localeIds != null, "<localeIds> must be specified"); 309 310 CldrDataSupplier src = CldrDataSupplier 311 .forCldrFilesIn(cldrPath) 312 .withDraftStatusAtLeast(minimumDraftStatus); 313 314 // We must do this wrapping of the data supplier _before_ creating the supplemental data 315 // instance since adding pseudo locales affects the set of available locales. 316 // TODO: Move some/all of this into the base converter and control it via the config. 317 if (!altPaths.isEmpty()) { 318 src = AlternateLocaleData.transform(src, getGlobalAltPaths(), getLocaleAltPaths()); 319 } 320 if (includePseudoLocales) { 321 src = PseudoLocales.addPseudoLocalesTo(src); 322 } 323 324 SupplementalData supplementalData = SupplementalData.create(src); 325 ImmutableSet<String> defaultTargetIds = 326 LocaleIdResolver.expandTargetIds(this.localeIds.ids, supplementalData); 327 for (IcuLocaleDir dir : IcuLocaleDir.values()) { 328 Iterable<String> ids = perDirectoryIds.asMap().getOrDefault(dir, defaultTargetIds); 329 config.addLocaleIds(dir, Iterables.filter(ids, idFilter::test)); 330 331 // We should only have locale IDs like "zh_Hant" here (language + script) and only 332 // those which would naturally inherit to "root" 333 inheritLanguageSubtag.get(dir).forEach(id -> { 334 checkArgument(id.matches("[a-z]{2}_[A-Z][a-z]{3}"), 335 "Invalid locale ID for inheritLanguageSubtag (expect '<lang>_<Script>'): ", id); 336 checkArgument(supplementalData.getParent(id).equals("root"), 337 "Invalid locale ID for inheritLanguageSubtag (parent must be 'root'): ", id); 338 config.addForcedParent(dir, id, id.substring(0, 2)); 339 }); 340 } 341 config.setMinimumDraftStatus(minimumDraftStatus); 342 LdmlConverter.convert(src, supplementalData, config.build()); 343 } 344 getGlobalAltPaths()345 private ImmutableMap<CldrPath, CldrPath> getGlobalAltPaths() { 346 // This fails if the same key appears more than once. 347 return altPaths.stream() 348 .filter(a -> a.localeIds.isEmpty()) 349 .collect(toImmutableMap( 350 a -> parseDistinguishingPath(a.target), 351 a -> parseDistinguishingPath(a.source))); 352 } 353 getLocaleAltPaths()354 private ImmutableTable<String, CldrPath, CldrPath> getLocaleAltPaths() { 355 return altPaths.stream() 356 .flatMap( 357 a -> a.localeIds.stream().map( 358 id -> immutableCell( 359 id, 360 parseDistinguishingPath(a.target), 361 parseDistinguishingPath(a.source)))) 362 // Weirdly there's no collector method to just collect cells. 363 .collect(toImmutableTable(Cell::getRowKey, Cell::getColumnKey, Cell::getValue)); 364 } 365 checkBuild(boolean condition, String message, Object... args)366 private static void checkBuild(boolean condition, String message, Object... args) { 367 if (!condition) { 368 throw new BuildException(String.format(message, args)); 369 } 370 } 371 parseLocaleIds(String localeIds)372 private static ImmutableSet<String> parseLocaleIds(String localeIds) { 373 // Need to filter out '//' style end-of-line comments first (replace with \n to avoid 374 // inadvertantly joining two elements. 375 localeIds = localeIds.replaceAll("//[^\n]*\n", "\n"); 376 return ImmutableSet.copyOf(LIST_SPLITTER.splitToList(localeIds)); 377 } 378 resolve(Class<T> enumClass, String name)379 private static <T extends Enum<T>> T resolve(Class<T> enumClass, String name) { 380 checkArgument(!name.isEmpty(), "enumeration name cannot be empty"); 381 checkArgument(VALID_ENUM_CHAR.matchesAllOf(name), 382 "invalid enumeration name '%s'; expected only ASCII letters or '_'", name); 383 CaseFormat format; 384 if (UPPER_UNDERSCORE.matchesAllOf(name)) { 385 format = CaseFormat.UPPER_UNDERSCORE; 386 } else if (LOWER_UNDERSCORE.matchesAllOf(name)) { 387 format = CaseFormat.LOWER_UNDERSCORE; 388 } else { 389 // Mixed case with '_' is not permitted. 390 checkArgument(!name.contains("_"), 391 "invalid enumeration name '%s'; mixed case with underscore not allowed: %s", name); 392 format = 393 Ascii.isLowerCase(name.charAt(0)) ? CaseFormat.LOWER_CAMEL : CaseFormat.UPPER_CAMEL; 394 } 395 try { 396 return Enum.valueOf(enumClass, format.to(CaseFormat.UPPER_UNDERSCORE, name)); 397 } catch (IllegalArgumentException e) { 398 String validNames = 399 Arrays.stream(enumClass.getEnumConstants()) 400 .map(Object::toString) 401 .collect(joining(", ")); 402 throw new IllegalArgumentException( 403 "invalid enumeration name " + name + "; expected one of; " + validNames); 404 } 405 } 406 } 407