1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu.ant; 4 5 import static com.google.common.base.CharMatcher.inRange; 6 import static com.google.common.base.CharMatcher.is; 7 import static com.google.common.base.CharMatcher.whitespace; 8 import static com.google.common.base.Preconditions.checkArgument; 9 import static com.google.common.base.Preconditions.checkNotNull; 10 import static com.google.common.base.Preconditions.checkState; 11 import static com.google.common.collect.ImmutableList.toImmutableList; 12 import static com.google.common.collect.ImmutableMap.toImmutableMap; 13 import static com.google.common.collect.ImmutableTable.toImmutableTable; 14 import static com.google.common.collect.Tables.immutableCell; 15 import static java.util.stream.Collectors.joining; 16 import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath; 17 18 import java.nio.file.Path; 19 import java.nio.file.Paths; 20 import java.util.ArrayList; 21 import java.util.Arrays; 22 import java.util.List; 23 import java.util.Set; 24 import java.util.function.Predicate; 25 import java.util.regex.Pattern; 26 import java.util.stream.Collectors; 27 28 import org.apache.tools.ant.BuildException; 29 import org.apache.tools.ant.Task; 30 import org.unicode.cldr.api.CldrDataSupplier; 31 import org.unicode.cldr.api.CldrDraftStatus; 32 import org.unicode.cldr.api.CldrPath; 33 import org.unicode.cldr.util.CLDRConfig; 34 import org.unicode.icu.tool.cldrtoicu.AlternateLocaleData; 35 import org.unicode.icu.tool.cldrtoicu.IcuConverterConfig; 36 import org.unicode.icu.tool.cldrtoicu.LdmlConverter; 37 import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType; 38 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir; 39 import org.unicode.icu.tool.cldrtoicu.PseudoLocales; 40 import org.unicode.icu.tool.cldrtoicu.SupplementalData; 41 42 import com.google.common.base.Ascii; 43 import com.google.common.base.CaseFormat; 44 import com.google.common.base.CharMatcher; 45 import com.google.common.base.Splitter; 46 import com.google.common.collect.HashMultimap; 47 import com.google.common.collect.ImmutableList; 48 import com.google.common.collect.ImmutableMap; 49 import com.google.common.collect.ImmutableSet; 50 import com.google.common.collect.ImmutableTable; 51 import com.google.common.collect.Iterables; 52 import com.google.common.collect.SetMultimap; 53 import com.google.common.collect.Sets; 54 import com.google.common.collect.Table.Cell; 55 56 // Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed. 57 public final class ConvertIcuDataTask extends Task { 58 private static final Splitter LIST_SPLITTER = 59 Splitter.on(CharMatcher.anyOf(",\n")).trimResults(whitespace()).omitEmptyStrings(); 60 61 private static final CharMatcher DIGIT_OR_UNDERSCORE = inRange('0', '9').or(is('_')); 62 private static final CharMatcher UPPER_UNDERSCORE = inRange('A', 'Z').or(DIGIT_OR_UNDERSCORE); 63 private static final CharMatcher LOWER_UNDERSCORE = inRange('a', 'z').or(DIGIT_OR_UNDERSCORE); 64 private static final CharMatcher VALID_ENUM_CHAR = LOWER_UNDERSCORE.or(UPPER_UNDERSCORE); 65 66 private Path cldrPath; 67 private CldrDraftStatus minimumDraftStatus; 68 // Set of default locale ID specifiers (wildcard IDs which are expanded). 69 private LocaleIds localeIds = null; 70 // Per directory overrides (fully specified locale IDs). 71 private final SetMultimap<IcuLocaleDir, String> perDirectoryIds = HashMultimap.create(); 72 private final SetMultimap<IcuLocaleDir, String> inheritLanguageSubtag = HashMultimap.create(); 73 private final IcuConverterConfig.Builder config = IcuConverterConfig.builder(); 74 // Don't try and resolve actual paths until inside the execute method. 75 private final List<AltPath> altPaths = new ArrayList<>(); 76 // TODO(CLDR-13381): Move into CLDR API; e.g. withPseudoLocales() 77 private boolean includePseudoLocales = false; 78 private Predicate<String> idFilter = id -> true; 79 80 @SuppressWarnings("unused") setOutputDir(String path)81 public void setOutputDir(String path) { 82 // Use String here since on some systems Ant doesn't support automatically converting Path instances. 83 config.setOutputDir(Paths.get(path)); 84 } 85 86 @SuppressWarnings("unused") setCldrDir(String path)87 public void setCldrDir(String path) { 88 // Use String here since on some systems Ant doesn't support automatically converting Path instances. 89 this.cldrPath = checkNotNull(Paths.get(path)); 90 } 91 92 @SuppressWarnings("unused") setIcuVersion(String icuVersion)93 public void setIcuVersion(String icuVersion) { 94 config.setIcuVersion(icuVersion); 95 } 96 97 @SuppressWarnings("unused") setIcuDataVersion(String icuDataVersion)98 public void setIcuDataVersion(String icuDataVersion) { 99 config.setIcuDataVersion(icuDataVersion); 100 } 101 102 @SuppressWarnings("unused") setCldrVersion(String cldrVersion)103 public void setCldrVersion(String cldrVersion) { 104 config.setCldrVersion(cldrVersion); 105 } 106 107 @SuppressWarnings("unused") setMinimalDraftStatus(String status)108 public void setMinimalDraftStatus(String status) { 109 minimumDraftStatus = resolve(CldrDraftStatus.class, status); 110 } 111 112 @SuppressWarnings("unused") setOutputTypes(String types)113 public void setOutputTypes(String types) { 114 ImmutableList<OutputType> typeList = 115 LIST_SPLITTER 116 .splitToList(types).stream() 117 .map(s -> resolve(OutputType.class, s)) 118 .collect(toImmutableList()); 119 if (!typeList.isEmpty()) { 120 config.setOutputTypes(typeList); 121 } 122 } 123 124 @SuppressWarnings("unused") setSpecialsDir(String path)125 public void setSpecialsDir(String path) { 126 // Use String here since on some systems Ant doesn't support automatically converting Path instances. 127 config.setSpecialsDir(Paths.get(path)); 128 } 129 130 @SuppressWarnings("unused") setIncludePseudoLocales(boolean includePseudoLocales)131 public void setIncludePseudoLocales(boolean includePseudoLocales) { 132 this.includePseudoLocales = includePseudoLocales; 133 } 134 135 @SuppressWarnings("unused") setLocaleIdFilter(String idFilterRegex)136 public void setLocaleIdFilter(String idFilterRegex) { 137 this.idFilter = Pattern.compile(idFilterRegex).asPredicate(); 138 } 139 140 @SuppressWarnings("unused") setEmitReport(boolean emit)141 public void setEmitReport(boolean emit) { 142 config.setEmitReport(emit); 143 } 144 145 public static final class LocaleIds extends Task { 146 private ImmutableSet<String> ids; 147 148 @SuppressWarnings("unused") addText(String localeIds)149 public void addText(String localeIds) { 150 this.ids = parseLocaleIds(localeIds); 151 } 152 153 @Override init()154 public void init() throws BuildException { 155 checkBuild(!ids.isEmpty(), "Locale IDs must be specified"); 156 } 157 } 158 159 public static final class Directory extends Task { 160 private IcuLocaleDir dir; 161 private ImmutableSet<String> inheritLanguageSubtag = ImmutableSet.of(); 162 private final List<ForcedAlias> forcedAliases = new ArrayList<>(); 163 private LocaleIds localeIds = null; 164 165 @SuppressWarnings("unused") setDir(String directory)166 public void setDir(String directory) { 167 this.dir = resolve(IcuLocaleDir.class, directory); 168 } 169 170 @SuppressWarnings("unused") setInheritLanguageSubtag(String localeIds)171 public void setInheritLanguageSubtag(String localeIds) { 172 this.inheritLanguageSubtag = parseLocaleIds(localeIds); 173 } 174 175 @SuppressWarnings("unused") addConfiguredForcedAlias(ForcedAlias alias)176 public void addConfiguredForcedAlias(ForcedAlias alias) { 177 forcedAliases.add(alias); 178 } 179 180 @SuppressWarnings("unused") addConfiguredLocaleIds(LocaleIds localeIds)181 public void addConfiguredLocaleIds(LocaleIds localeIds) { 182 checkBuild(this.localeIds == null, 183 "Cannot add more that one <localeIds> element for <directory>: %s", dir); 184 this.localeIds = localeIds; 185 } 186 187 @Override init()188 public void init() throws BuildException { 189 checkBuild(dir != null, "Directory attribute 'dir' must be specified"); 190 checkBuild(localeIds != null, "<localeIds> must be specified for <directory>: %s", dir); 191 } 192 } 193 194 public static final class ForcedAlias extends Task { 195 private String source = ""; 196 private String target = ""; 197 198 @SuppressWarnings("unused") setSource(String source)199 public void setSource(String source) { 200 this.source = whitespace().trimFrom(source); 201 } 202 203 @SuppressWarnings("unused") setTarget(String target)204 public void setTarget(String target) { 205 this.target = whitespace().trimFrom(target); 206 } 207 208 @Override init()209 public void init() throws BuildException { 210 checkBuild(!source.isEmpty(), "Alias source must not be empty"); 211 checkBuild(!target.isEmpty(), "Alias target must not be empty"); 212 } 213 } 214 215 public static final class AltPath extends Task { 216 private String source = ""; 217 private String target = ""; 218 private ImmutableSet<String> localeIds = ImmutableSet.of(); 219 220 @SuppressWarnings("unused") setTarget(String target)221 public void setTarget(String target) { 222 this.target = target.replace('\'', '"'); 223 } 224 225 @SuppressWarnings("unused") setSource(String source)226 public void setSource(String source) { 227 this.source = source.replace('\'', '"'); 228 } 229 230 @SuppressWarnings("unused") setLocales(String localeIds)231 public void setLocales(String localeIds) { 232 this.localeIds = parseLocaleIds(localeIds); 233 } 234 235 @Override init()236 public void init() throws BuildException { 237 checkBuild(!source.isEmpty(), "Source path not be empty"); 238 checkBuild(!target.isEmpty(), "Target path not be empty"); 239 } 240 } 241 242 @SuppressWarnings("unused") addConfiguredLocaleIds(LocaleIds localeIds)243 public void addConfiguredLocaleIds(LocaleIds localeIds) { 244 checkBuild(this.localeIds == null, "Cannot add more that one <localeIds> element"); 245 this.localeIds = localeIds; 246 } 247 248 @SuppressWarnings("unused") addConfiguredDirectory(Directory filter)249 public void addConfiguredDirectory(Directory filter) { 250 checkState(!perDirectoryIds.containsKey(filter.dir), 251 "directory %s specified twice", filter.dir); 252 ImmutableSet<String> ids = filter.localeIds.ids; 253 perDirectoryIds.putAll(filter.dir, ids); 254 255 // Check that any locale IDs marked to inherit the base language (instead of root) are 256 // listed in the set of generated locales. 257 inheritLanguageSubtag.putAll(filter.dir, filter.inheritLanguageSubtag); 258 if (!ids.containsAll(filter.inheritLanguageSubtag)) { 259 log(String.format( 260 "WARNING: Locale IDs listed in 'inheritLanguageSubtag' should also be listed " 261 + "in <localeIds> for that directory (%s): %s", 262 filter.dir, String.join(", ", Sets.difference(filter.inheritLanguageSubtag, ids)))); 263 perDirectoryIds.putAll(filter.dir, filter.inheritLanguageSubtag); 264 } 265 266 // Check that locales specified for forced aliases in this directory are also listed in 267 // the set of generated locales. 268 filter.forcedAliases.forEach(a -> config.addForcedAlias(filter.dir, a.source, a.target)); 269 Set<String> sourceIds = 270 filter.forcedAliases.stream().map(a -> a.source).collect(Collectors.toSet()); 271 if (!ids.containsAll(sourceIds)) { 272 Set<String> missingIds = Sets.difference(sourceIds, ids); 273 log(String.format( 274 "WARNING: Locale IDs listed as sources of a <forcedAlias> should also be listed " 275 + "in <localeIds> for that directory (%s): %s", 276 filter.dir, String.join(", ", missingIds))); 277 perDirectoryIds.putAll(filter.dir, missingIds); 278 } 279 Set<String> targetIds = 280 filter.forcedAliases.stream().map(a -> a.target).collect(Collectors.toSet()); 281 if (!ids.containsAll(targetIds)) { 282 Set<String> missingIds = Sets.difference(targetIds, ids); 283 log(String.format( 284 "WARNING: Locale IDs listed as targets of a <forcedAlias> should also be listed " 285 + "in <localeIds> for that directory (%s): %s", 286 filter.dir, String.join(", ", missingIds))); 287 perDirectoryIds.putAll(filter.dir, missingIds); 288 } 289 } 290 291 // Aliases on the outside are applied to all directories. 292 @SuppressWarnings("unused") addConfiguredForcedAlias(ForcedAlias alias)293 public void addConfiguredForcedAlias(ForcedAlias alias) { 294 for (IcuLocaleDir dir : IcuLocaleDir.values()) { 295 config.addForcedAlias(dir, alias.source, alias.target); 296 } 297 } 298 299 @SuppressWarnings("unused") addConfiguredAltPath(AltPath altPath)300 public void addConfiguredAltPath(AltPath altPath) { 301 // Don't convert to CldrPath here (it triggers a bunch of CLDR data loading for the DTDs). 302 // Wait until the "execute()" method since in future we expect to use the configured CLDR 303 // directory explicitly there. 304 altPaths.add(altPath); 305 } 306 307 @SuppressWarnings("unused") execute()308 public void execute() throws BuildException { 309 // Spin up CLDRConfig outside of other inner loops, to 310 // avoid static init problems seen in CLDR-14636 311 CLDRConfig.getInstance().getSupplementalDataInfo(); 312 313 checkBuild(localeIds != null, "<localeIds> must be specified"); 314 315 CldrDataSupplier src = CldrDataSupplier 316 .forCldrFilesIn(cldrPath) 317 .withDraftStatusAtLeast(minimumDraftStatus); 318 319 // We must do this wrapping of the data supplier _before_ creating the supplemental data 320 // instance since adding pseudo locales affects the set of available locales. 321 // TODO: Move some/all of this into the base converter and control it via the config. 322 if (!altPaths.isEmpty()) { 323 src = AlternateLocaleData.transform(src, getGlobalAltPaths(), getLocaleAltPaths()); 324 } 325 if (includePseudoLocales) { 326 src = PseudoLocales.addPseudoLocalesTo(src); 327 } 328 329 SupplementalData supplementalData = SupplementalData.create(src); 330 ImmutableSet<String> defaultTargetIds = 331 LocaleIdResolver.expandTargetIds(this.localeIds.ids, supplementalData); 332 for (IcuLocaleDir dir : IcuLocaleDir.values()) { 333 Iterable<String> ids = perDirectoryIds.asMap().getOrDefault(dir, defaultTargetIds); 334 config.addLocaleIds(dir, Iterables.filter(ids, idFilter::test)); 335 336 // We should only have locale IDs like "zh_Hant" here (language + script) and only 337 // those which would naturally inherit to "root" 338 inheritLanguageSubtag.get(dir).forEach(id -> { 339 checkArgument(id.matches("[a-z]{2}_[A-Z][a-z]{3}"), 340 "Invalid locale ID for inheritLanguageSubtag (expect '<lang>_<Script>'): ", id); 341 checkArgument(supplementalData.getParent(id).equals("root"), 342 "Invalid locale ID for inheritLanguageSubtag (parent must be 'root'): ", id); 343 config.addForcedParent(dir, id, id.substring(0, 2)); 344 }); 345 } 346 config.setMinimumDraftStatus(minimumDraftStatus); 347 LdmlConverter.convert(src, supplementalData, config.build()); 348 } 349 getGlobalAltPaths()350 private ImmutableMap<CldrPath, CldrPath> getGlobalAltPaths() { 351 // This fails if the same key appears more than once. 352 return altPaths.stream() 353 .filter(a -> a.localeIds.isEmpty()) 354 .collect(toImmutableMap( 355 a -> parseDistinguishingPath(a.target), 356 a -> parseDistinguishingPath(a.source))); 357 } 358 getLocaleAltPaths()359 private ImmutableTable<String, CldrPath, CldrPath> getLocaleAltPaths() { 360 return altPaths.stream() 361 .flatMap( 362 a -> a.localeIds.stream().map( 363 id -> immutableCell( 364 id, 365 parseDistinguishingPath(a.target), 366 parseDistinguishingPath(a.source)))) 367 // Weirdly there's no collector method to just collect cells. 368 .collect(toImmutableTable(Cell::getRowKey, Cell::getColumnKey, Cell::getValue)); 369 } 370 checkBuild(boolean condition, String message, Object... args)371 private static void checkBuild(boolean condition, String message, Object... args) { 372 if (!condition) { 373 throw new BuildException(String.format(message, args)); 374 } 375 } 376 parseLocaleIds(String localeIds)377 private static ImmutableSet<String> parseLocaleIds(String localeIds) { 378 // Need to filter out '//' style end-of-line comments first (replace with \n to avoid 379 // inadvertently joining two elements. 380 localeIds = localeIds.replaceAll("//[^\n]*\n", "\n"); 381 return ImmutableSet.copyOf(LIST_SPLITTER.splitToList(localeIds)); 382 } 383 resolve(Class<T> enumClass, String name)384 private static <T extends Enum<T>> T resolve(Class<T> enumClass, String name) { 385 checkArgument(!name.isEmpty(), "enumeration name cannot be empty"); 386 checkArgument(VALID_ENUM_CHAR.matchesAllOf(name), 387 "invalid enumeration name '%s'; expected only ASCII letters or '_'", name); 388 CaseFormat format; 389 if (UPPER_UNDERSCORE.matchesAllOf(name)) { 390 format = CaseFormat.UPPER_UNDERSCORE; 391 } else if (LOWER_UNDERSCORE.matchesAllOf(name)) { 392 format = CaseFormat.LOWER_UNDERSCORE; 393 } else { 394 // Mixed case with '_' is not permitted. 395 checkArgument(!name.contains("_"), 396 "invalid enumeration name '%s'; mixed case with underscore not allowed: %s", name); 397 format = 398 Ascii.isLowerCase(name.charAt(0)) ? CaseFormat.LOWER_CAMEL : CaseFormat.UPPER_CAMEL; 399 } 400 try { 401 return Enum.valueOf(enumClass, format.to(CaseFormat.UPPER_UNDERSCORE, name)); 402 } catch (IllegalArgumentException e) { 403 String validNames = 404 Arrays.stream(enumClass.getEnumConstants()) 405 .map(Object::toString) 406 .collect(joining(", ")); 407 throw new IllegalArgumentException( 408 "invalid enumeration name " + name + "; expected one of; " + validNames); 409 } 410 } 411 } 412