1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu; 4 5 import static com.google.common.base.Preconditions.checkArgument; 6 import static com.google.common.base.Preconditions.checkNotNull; 7 import static com.google.common.collect.ImmutableList.toImmutableList; 8 import static java.nio.charset.StandardCharsets.UTF_8; 9 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED; 10 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; 11 import static org.unicode.cldr.api.CldrDataType.BCP47; 12 import static org.unicode.cldr.api.CldrDataType.LDML; 13 import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL; 14 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.BRKITR; 15 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.COLL; 16 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.CURR; 17 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LANG; 18 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LOCALES; 19 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.RBNF; 20 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.REGION; 21 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.UNIT; 22 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.ZONE; 23 24 import java.io.BufferedWriter; 25 import java.io.IOException; 26 import java.io.InputStream; 27 import java.io.InputStreamReader; 28 import java.io.PrintWriter; 29 import java.nio.file.Files; 30 import java.nio.file.Path; 31 import java.util.*; 32 import java.util.function.Predicate; 33 import java.util.stream.Collectors; 34 import java.util.stream.Stream; 35 36 import org.unicode.cldr.api.CldrData; 37 import org.unicode.cldr.api.CldrDataSupplier; 38 import org.unicode.cldr.api.CldrDataType; 39 import org.unicode.cldr.api.CldrPath; 40 import org.unicode.cldr.api.PathMatcher; 41 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir; 42 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuVersionInfo; 43 import org.unicode.icu.tool.cldrtoicu.localedistance.LocaleDistanceMapper; 44 import org.unicode.icu.tool.cldrtoicu.mapper.Bcp47Mapper; 45 import org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapper; 46 import org.unicode.icu.tool.cldrtoicu.mapper.CollationMapper; 47 import org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapper; 48 import org.unicode.icu.tool.cldrtoicu.mapper.LocaleMapper; 49 import org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapper; 50 import org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapper; 51 import org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapper; 52 import org.unicode.icu.tool.cldrtoicu.mapper.SupplementalMapper; 53 import org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapper; 54 import org.unicode.icu.tool.cldrtoicu.regex.RegexTransformer; 55 56 import com.google.common.base.CharMatcher; 57 import com.google.common.collect.HashMultimap; 58 import com.google.common.collect.ImmutableList; 59 import com.google.common.collect.ImmutableListMultimap; 60 import com.google.common.collect.ImmutableMap; 61 import com.google.common.collect.ImmutableSet; 62 import com.google.common.collect.LinkedListMultimap; 63 import com.google.common.collect.ListMultimap; 64 import com.google.common.collect.Maps; 65 import com.google.common.collect.SetMultimap; 66 import com.google.common.collect.Sets; 67 import com.google.common.io.CharStreams; 68 69 /** 70 * The main converter tool for CLDR to ICU data. To run this tool, you need to supply a suitable 71 * {@link LdmlConverterConfig} instance. There is a simple {@code main()} method available in this 72 * class which can be invoked passing just the desired output directory and which relies on the 73 * presence of several system properties for the remainder of its parameters: 74 * <ul> 75 * <li>CLDR_DIR: The root of the CLDR release from which CLDR data is read. 76 * <li>ICU_DIR: The root of the ICU release from which additional "specials" XML data is read. 77 * <li>CLDR_DTD_CACHE: A temporary directory with the various DTDs cached (this is a legacy 78 * requirement from the underlying CLDR libraries and might go away one day). 79 * </ul> 80 */ 81 public final class LdmlConverter { 82 // TODO: Do all supplemental data in one go and split similarly to locale data (using RbPath). 83 private static final Predicate<CldrPath> GENDER_LIST_PATHS = 84 supplementalMatcher("gender"); 85 private static final Predicate<CldrPath> LIKELY_SUBTAGS_PATHS = 86 supplementalMatcher("likelySubtags"); 87 private static final Predicate<CldrPath> METAZONE_PATHS = 88 supplementalMatcher("metaZones", "primaryZones"); 89 private static final Predicate<CldrPath> METADATA_PATHS = 90 supplementalMatcher("metadata"); 91 private static final Predicate<CldrPath> SUPPLEMENTAL_DATA_PATHS = 92 supplementalMatcher( 93 "calendarData", 94 "calendarPreferenceData", 95 "codeMappings", 96 "codeMappingsCurrency", 97 "idValidity", 98 "languageData", 99 "languageMatching", 100 "measurementData", 101 "parentLocales", 102 "subdivisionContainment", 103 "territoryContainment", 104 "territoryInfo", 105 "timeData", 106 "weekData", 107 "weekOfPreference"); 108 private static final Predicate<CldrPath> CURRENCY_DATA_PATHS = 109 supplementalMatcher("currencyData"); 110 private static final Predicate<CldrPath> UNITS_DATA_PATHS = 111 supplementalMatcher( 112 "convertUnits", 113 "unitConstants", 114 "unitQuantities", 115 "unitPreferenceData"); 116 private static final Predicate<CldrPath> NUMBERING_SYSTEMS_PATHS = 117 supplementalMatcher("numberingSystems"); 118 private static final Predicate<CldrPath> WINDOWS_ZONES_PATHS = 119 supplementalMatcher("windowsZones"); 120 supplementalMatcher(String... spec)121 private static Predicate<CldrPath> supplementalMatcher(String... spec) { 122 checkArgument(spec.length > 0, "must supply at least one matcher spec"); 123 if (spec.length == 1) { 124 return PathMatcher.of("//supplementalData/" + spec[0])::matchesPrefixOf; 125 } 126 return 127 Arrays.stream(spec) 128 .map(s -> PathMatcher.of("//supplementalData/" + s)) 129 .map(m -> ((Predicate<CldrPath>) m::matchesPrefixOf)) 130 .reduce(p -> false, Predicate::or); 131 } 132 133 private static RbPath RB_PARENT = RbPath.of("%%Parent"); 134 // The quotes below are only so we achieve parity with the manually written alias files. 135 // TODO: Remove unnecessary quotes once the migration to this code is complete. 136 private static RbPath RB_ALIAS = RbPath.of("\"%%ALIAS\""); 137 // Special path for adding to empty files which only exist to complete the parent chain. 138 // TODO: Confirm that this has no meaningful effect and unify "empty" file contents. 139 private static RbPath RB_EMPTY_ALIAS = RbPath.of("___"); 140 141 /** 142 * Output types defining specific subsets of the ICU data which can be converted separately. 143 * This closely mimics the original "NewLdml2IcuConverter" behaviour but could be simplified to 144 * hide what are essentially implementation specific data splits. 145 */ 146 public enum OutputType { 147 LOCALES(LDML), 148 BRKITR(LDML), 149 COLL(LDML), 150 RBNF(LDML), 151 DAY_PERIODS(SUPPLEMENTAL), 152 GENDER_LIST(SUPPLEMENTAL), 153 LIKELY_SUBTAGS(SUPPLEMENTAL), 154 SUPPLEMENTAL_DATA(SUPPLEMENTAL), 155 UNITS(SUPPLEMENTAL), 156 CURRENCY_DATA(SUPPLEMENTAL), 157 METADATA(SUPPLEMENTAL), 158 META_ZONES(SUPPLEMENTAL), 159 NUMBERING_SYSTEMS(SUPPLEMENTAL), 160 PLURALS(SUPPLEMENTAL), 161 PLURAL_RANGES(SUPPLEMENTAL), 162 WINDOWS_ZONES(SUPPLEMENTAL), 163 TRANSFORMS(SUPPLEMENTAL), 164 LOCALE_DISTANCE(SUPPLEMENTAL), 165 VERSION(SUPPLEMENTAL), 166 KEY_TYPE_DATA(BCP47); 167 168 public static final ImmutableSet<OutputType> ALL = ImmutableSet.copyOf(OutputType.values()); 169 170 private final CldrDataType type; 171 OutputType(CldrDataType type)172 OutputType(CldrDataType type) { 173 this.type = checkNotNull(type); 174 } 175 getCldrType()176 CldrDataType getCldrType() { 177 return type; 178 } 179 } 180 181 // Map to convert the rather arbitrarily defined "output types" to the directories into which 182 // the data is written. This is only for "LDML" types since other mappers don't need to split 183 // data into multiple directories. 184 private static final ImmutableListMultimap<OutputType, IcuLocaleDir> TYPE_TO_DIR = 185 ImmutableListMultimap.<OutputType, IcuLocaleDir>builder() 186 .putAll(OutputType.LOCALES, CURR, LANG, LOCALES, REGION, UNIT, ZONE) 187 .putAll(OutputType.BRKITR, BRKITR) 188 .putAll(OutputType.COLL, COLL) 189 .putAll(OutputType.RBNF, RBNF) 190 .build(); 191 192 /** Converts CLDR data according to the given configuration. */ convert( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)193 public static void convert( 194 CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) { 195 new LdmlConverter(src, supplementalData, config).convertAll(); 196 } 197 198 // The supplier for all data to be converted. 199 private final CldrDataSupplier src; 200 // Supplemental data available to mappers if needed. 201 private final SupplementalData supplementalData; 202 // The configuration controlling conversion behaviour. 203 private final LdmlConverterConfig config; 204 // The set of expanded target locale IDs. 205 // TODO: Make available IDs include specials files (or fail if specials are not available). 206 private final ImmutableSet<String> availableIds; 207 // Transformer for locale data. 208 private final PathValueTransformer localeTransformer; 209 // Transformer for supplemental data. 210 private final PathValueTransformer supplementalTransformer; 211 // Header string to go into every ICU data and transliteration rule file (comment prefixes 212 // are not present and must be added by the code writing the file). 213 private final ImmutableList<String> fileHeader; 214 LdmlConverter( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)215 private LdmlConverter( 216 CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) { 217 this.src = checkNotNull(src); 218 this.supplementalData = checkNotNull(supplementalData); 219 this.config = checkNotNull(config); 220 this.availableIds = ImmutableSet.copyOf( 221 Sets.intersection(supplementalData.getAvailableLocaleIds(), config.getAllLocaleIds())); 222 // Load the remaining path value transformers. 223 this.supplementalTransformer = 224 RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_supplemental.txt"), 225 IcuFunctions.ALGORITHM_FN, 226 IcuFunctions.DATE_FN, 227 IcuFunctions.DAY_NUMBER_FN, 228 IcuFunctions.EXP_FN, 229 IcuFunctions.YMD_FN); 230 this.localeTransformer = 231 RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_locale.txt"), 232 IcuFunctions.CONTEXT_TRANSFORM_INDEX_FN); 233 this.fileHeader = readLinesFromResource("/ldml2icu_header.txt"); 234 } 235 convertAll()236 private void convertAll() { 237 processLdml(); 238 processSupplemental(); 239 if (config.emitReport()) { 240 System.out.println("Supplemental Data Transformer=" + supplementalTransformer); 241 System.out.println("Locale Data Transformer=" + localeTransformer); 242 } 243 } 244 readLinesFromResource(String name)245 private static ImmutableList<String> readLinesFromResource(String name) { 246 try (InputStream in = LdmlConverter.class.getResourceAsStream(name)) { 247 return ImmutableList.copyOf(CharStreams.readLines(new InputStreamReader(in, UTF_8))); 248 } catch (IOException e) { 249 throw new RuntimeException("cannot read resource: " + name, e); 250 } 251 } 252 loadSpecialsData(String localeId)253 private Optional<CldrData> loadSpecialsData(String localeId) { 254 String expected = localeId + ".xml"; 255 try (Stream<Path> files = Files.walk(config.getSpecialsDir())) { 256 Set<Path> xmlFiles = files 257 .filter(Files::isRegularFile) 258 .filter(f -> f.getFileName().toString().equals(expected)) 259 .collect(Collectors.toSet()); 260 return !xmlFiles.isEmpty() 261 ? Optional.of( 262 CldrDataSupplier.forCldrFiles(LDML, config.getMinimumDraftStatus(), xmlFiles)) 263 : Optional.empty(); 264 } catch (IOException e) { 265 throw new RuntimeException( 266 "error processing specials directory: " + config.getSpecialsDir(), e); 267 } 268 } 269 processLdml()270 private void processLdml() { 271 ImmutableList<IcuLocaleDir> splitDirs = 272 config.getOutputTypes().stream() 273 .filter(t -> t.getCldrType() == LDML) 274 .flatMap(t -> TYPE_TO_DIR.get(t).stream()) 275 .collect(toImmutableList()); 276 if (splitDirs.isEmpty()) { 277 return; 278 } 279 280 String cldrVersion = config.getVersionInfo().getCldrVersion(); 281 282 Map<IcuLocaleDir, DependencyGraph> graphMetadata = new HashMap<>(); 283 splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph(cldrVersion))); 284 285 SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create(); 286 Path baseDir = config.getOutputDir(); 287 288 for (String id : config.getAllLocaleIds()) { 289 // Skip "target" IDs that are aliases (they are handled later). 290 if (!availableIds.contains(id)) { 291 continue; 292 } 293 // TODO: Remove the following skip when ICU-20997 is fixed 294 if (id.contains("VALENCIA")) { 295 System.out.println("(skipping " + id + " until ICU-20997 is fixed)"); 296 continue; 297 } 298 299 IcuData icuData = new IcuData(id, true); 300 301 Optional<CldrData> specials = loadSpecialsData(id); 302 CldrData unresolved = src.getDataForLocale(id, UNRESOLVED); 303 304 BreakIteratorMapper.process(icuData, unresolved, specials); 305 CollationMapper.process(icuData, unresolved, specials, cldrVersion); 306 RbnfMapper.process(icuData, unresolved, specials); 307 308 CldrData resolved = src.getDataForLocale(id, RESOLVED); 309 Optional<String> defaultCalendar = supplementalData.getDefaultCalendar(id); 310 LocaleMapper.process( 311 icuData, unresolved, resolved, specials, localeTransformer, defaultCalendar); 312 313 ListMultimap<IcuLocaleDir, RbPath> splitPaths = LinkedListMultimap.create(); 314 for (RbPath p : icuData.getPaths()) { 315 String rootName = getBaseSegmentName(p.getSegment(0)); 316 splitPaths.put(LOCALE_SPLIT_INFO.getOrDefault(rootName, LOCALES), p); 317 } 318 319 Optional<String> parent = supplementalData.getExplicitParentLocaleOf(id); 320 // We always write base languages (even if empty). 321 boolean isBaseLanguage = !id.contains("_"); 322 // Run through all directories (not just the keySet() of the split path map) since we 323 // sometimes write empty files. 324 for (IcuLocaleDir dir : splitDirs) { 325 Set<String> targetIds = config.getTargetLocaleIds(dir); 326 if (!targetIds.contains(id)) { 327 if (!splitPaths.get(dir).isEmpty()) { 328 System.out.format( 329 "target IDs for %s does not contain %s, but it has data: %s\n", 330 dir, id, splitPaths.get(dir)); 331 } 332 continue; 333 } 334 335 Path outDir = baseDir.resolve(dir.getOutputDir()); 336 IcuData splitData = new IcuData(icuData.getName(), icuData.hasFallback()); 337 338 // The split data can still be empty for this directory, but that's expected (it 339 // might only be written because it has an explicit parent added below). 340 splitPaths.get(dir).forEach(p -> splitData.add(p, icuData.get(p))); 341 342 // If we add an explicit parent locale, it forces the data to be written. This is 343 // where we check for forced overrides of the parent relationship (which is a per 344 // directory thing). 345 getIcuParent(id, parent, dir).ifPresent(p -> { 346 splitData.add(RB_PARENT, p); 347 graphMetadata.get(dir).addParent(id, p); 348 }); 349 350 if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) { 351 if (id.equals("root")) { 352 splitData.setVersion(cldrVersion); 353 } 354 write(splitData, outDir, false); 355 writtenLocaleIds.put(dir, id); 356 } 357 } 358 } 359 360 for (IcuLocaleDir dir : splitDirs) { 361 Path outDir = baseDir.resolve(dir.getOutputDir()); 362 Set<String> targetIds = config.getTargetLocaleIds(dir); 363 DependencyGraph depGraph = graphMetadata.get(dir); 364 365 // TODO: Maybe calculate alias map directly into the dependency graph? 366 Map<String, String> aliasMap = getAliasMap(targetIds, dir); 367 aliasMap.forEach((s, t) -> { 368 depGraph.addAlias(s, t); 369 writeAliasFile(s, t, outDir); 370 // It's only important to record which alias files are written because of forced 371 // aliases, but since it's harmless otherwise, we just do it unconditionally. 372 // Normal alias files don't affect the empty file calculation, but forced ones can. 373 writtenLocaleIds.put(dir, s); 374 }); 375 376 calculateEmptyFiles(writtenLocaleIds.get(dir), aliasMap.values()) 377 .forEach(id -> writeEmptyFile(id, outDir, aliasMap.values())); 378 379 writeDependencyGraph(outDir, depGraph); 380 } 381 } 382 383 384 private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%"); 385 386 // Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type 387 // annotations (e.g. "languages:intvector"). We strip these when considering the element name. getBaseSegmentName(String segment)388 private static String getBaseSegmentName(String segment) { 389 int idx = PATH_MODIFIER.indexIn(segment); 390 return idx == -1 ? segment : segment.substring(0, idx); 391 } 392 393 /* 394 * There are four reasons for treating a locale ID as an alias. 395 * 1: It contains deprecated subtags (e.g. "sr_YU", which should be "sr_Cyrl_RS"). 396 * 2: It has no CLDR data but is missing a script subtag. 397 * 3: It is one of the special "phantom" alias which cannot be represented normally 398 * and must be manually mapped (e.g. legacy locale IDs which don't even parse). 399 * 4: It is a "super special" forced alias, which might replace existing aliases in 400 * some output directories. 401 */ getAliasMap(Set<String> localeIds, IcuLocaleDir dir)402 private Map<String, String> getAliasMap(Set<String> localeIds, IcuLocaleDir dir) { 403 // Even forced aliases only apply if they are in the set of locale IDs for the directory. 404 Map<String, String> forcedAliases = 405 Maps.filterKeys(config.getForcedAliases(dir), localeIds::contains); 406 407 Map<String, String> aliasMap = new LinkedHashMap<>(); 408 for (String id : localeIds) { 409 if (forcedAliases.containsKey(id)) { 410 // Forced aliases will be added later and don't need to be processed here. This 411 // is especially necessary if the ID is not structurally valid (e.g. "no_NO_NY") 412 // since that cannot be processed by the code below. 413 continue; 414 } 415 String canonicalId = supplementalData.replaceDeprecatedTags(id); 416 if (!canonicalId.equals(id)) { 417 // If the canonical form of an ID differs from the requested ID, the this is an 418 // alias, and just needs to point to the canonical ID. 419 aliasMap.put(id, canonicalId); 420 continue; 421 } 422 if (availableIds.contains(id)) { 423 // If it's canonical and supported, it's not an alias. 424 continue; 425 } 426 // If the requested locale is not supported, maximize it and alias to that. 427 String maximizedId = supplementalData.maximize(id) 428 .orElseThrow(() -> new IllegalArgumentException("unsupported locale ID: " + id)); 429 // We can't alias to ourselves and we shouldn't be here is the ID was already maximal. 430 checkArgument(!maximizedId.equals(id), "unsupported maximized locale ID: %s", id); 431 aliasMap.put(id, maximizedId); 432 } 433 // Important that we overwrite entries which might already exist here, since we might have 434 // already calculated a "natural" alias for something that we want to force (and we should 435 // replace the existing target, since that affects how we determine empty files later). 436 aliasMap.putAll(forcedAliases); 437 return aliasMap; 438 } 439 440 /* 441 * Helper to determine the correct parent ID to be written into the ICU data file. The rules 442 * are: 443 * 1: If no forced parent exists (common) write the explicit parent (if that exists) 444 * 2: If a forced parent exists, but the forced value is what you would get by just truncating 445 * the current locale ID, write nothing (ICU libraries truncate when no parent is set). 446 * 3: Write the forced parent (this is an exceptional case, and may not even occur in data). 447 */ getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir)448 private Optional<String> getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir) { 449 String forcedParentId = config.getForcedParents(dir).get(id); 450 if (forcedParentId == null) { 451 return parent; 452 } 453 return id.contains("_") && forcedParentId.regionMatches(0, id, 0, id.lastIndexOf('_')) 454 ? Optional.empty() : Optional.of(forcedParentId); 455 } 456 processSupplemental()457 private void processSupplemental() { 458 for (OutputType type : config.getOutputTypes()) { 459 if (type.getCldrType() == LDML) { 460 continue; 461 } 462 switch (type) { 463 case DAY_PERIODS: 464 write(DayPeriodsMapper.process(src), "misc"); 465 break; 466 467 case GENDER_LIST: 468 processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false); 469 break; 470 471 case LIKELY_SUBTAGS: 472 processSupplemental("likelySubtags", LIKELY_SUBTAGS_PATHS, "misc", false); 473 break; 474 475 case SUPPLEMENTAL_DATA: 476 processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true); 477 break; 478 479 case UNITS: 480 processSupplemental("units", UNITS_DATA_PATHS, "misc", true); 481 break; 482 483 case CURRENCY_DATA: 484 processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", false); 485 break; 486 487 case METADATA: 488 processSupplemental("metadata", METADATA_PATHS, "misc", false); 489 break; 490 491 case META_ZONES: 492 processSupplemental("metaZones", METAZONE_PATHS, "misc", false); 493 break; 494 495 case NUMBERING_SYSTEMS: 496 processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false); 497 break; 498 499 case PLURALS: 500 write(PluralsMapper.process(src), "misc"); 501 break; 502 503 case PLURAL_RANGES: 504 write(PluralRangesMapper.process(src), "misc"); 505 break; 506 507 case LOCALE_DISTANCE: 508 write(LocaleDistanceMapper.process(src), "misc"); 509 break; 510 511 case WINDOWS_ZONES: 512 processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false); 513 break; 514 515 case TRANSFORMS: 516 Path transformDir = createDirectory(config.getOutputDir().resolve("translit")); 517 write(TransformsMapper.process(src, transformDir, fileHeader), transformDir, false); 518 break; 519 520 case VERSION: 521 writeIcuVersionInfo(); 522 break; 523 524 case KEY_TYPE_DATA: 525 Bcp47Mapper.process(src).forEach(d -> write(d, "misc")); 526 break; 527 528 default: 529 throw new AssertionError("Unsupported supplemental type: " + type); 530 } 531 } 532 } 533 534 private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion"); 535 processSupplemental( String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion)536 private void processSupplemental( 537 String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion) { 538 IcuData icuData = 539 SupplementalMapper.process(src, supplementalTransformer, label, paths); 540 // A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the 541 // supplemental data XML files. 542 if (addCldrVersion) { 543 // Not the same path as used by "setVersion()" 544 icuData.add(RB_CLDR_VERSION, config.getVersionInfo().getCldrVersion()); 545 } 546 write(icuData, dir); 547 } 548 writeAliasFile(String srcId, String destId, Path dir)549 private void writeAliasFile(String srcId, String destId, Path dir) { 550 IcuData icuData = new IcuData(srcId, true); 551 icuData.add(RB_ALIAS, destId); 552 // Allow overwrite for aliases since some are "forced" and overwrite existing targets. 553 // TODO: Maybe tighten this up so only forced aliases for existing targets are overwritten. 554 write(icuData, dir, true); 555 } 556 writeEmptyFile(String id, Path dir, Collection<String> aliasTargets)557 private void writeEmptyFile(String id, Path dir, Collection<String> aliasTargets) { 558 IcuData icuData = new IcuData(id, true); 559 // TODO: Document the reason for this (i.e. why does it matter what goes into empty files?) 560 if (aliasTargets.contains(id)) { 561 icuData.setFileComment("generated alias target"); 562 icuData.add(RB_EMPTY_ALIAS, ""); 563 } else { 564 // These empty files only exist because the target of an alias has a parent locale 565 // which is itself not in the set of written ICU files. An "indirect alias target". 566 // No need to add data: Just write a resource bundle with an empty top-level table. 567 } 568 write(icuData, dir, false); 569 } 570 writeIcuVersionInfo()571 private void writeIcuVersionInfo() { 572 IcuVersionInfo versionInfo = config.getVersionInfo(); 573 IcuData versionData = new IcuData("icuver", false); 574 versionData.add(RbPath.of("ICUVersion"), versionInfo.getIcuVersion()); 575 versionData.add(RbPath.of("DataVersion"), versionInfo.getIcuDataVersion()); 576 versionData.add(RbPath.of("CLDRVersion"), versionInfo.getCldrVersion()); 577 // Write file via non-helper methods since we need to include a legacy copyright. 578 Path miscDir = config.getOutputDir().resolve("misc"); 579 createDirectory(miscDir); 580 ImmutableList<String> versionHeader = ImmutableList.<String>builder() 581 .addAll(fileHeader) 582 .add( 583 "***************************************************************************", 584 "*", 585 "* Copyright (C) 2010-2016 International Business Machines", 586 "* Corporation and others. All Rights Reserved.", 587 "*", 588 "***************************************************************************") 589 .build(); 590 IcuTextWriter.writeToFile(versionData, miscDir, versionHeader, false); 591 } 592 593 // Commonest case for writing data files in "normal" directories. write(IcuData icuData, String dir)594 private void write(IcuData icuData, String dir) { 595 write(icuData, config.getOutputDir().resolve(dir), false); 596 } 597 write(IcuData icuData, Path dir, boolean allowOverwrite)598 private void write(IcuData icuData, Path dir, boolean allowOverwrite) { 599 createDirectory(dir); 600 IcuTextWriter.writeToFile(icuData, dir, fileHeader, allowOverwrite); 601 } 602 createDirectory(Path dir)603 private Path createDirectory(Path dir) { 604 try { 605 Files.createDirectories(dir); 606 } catch (IOException e) { 607 throw new RuntimeException("cannot create directory: " + dir, e); 608 } 609 return dir; 610 } 611 writeDependencyGraph(Path dir, DependencyGraph depGraph)612 private void writeDependencyGraph(Path dir, DependencyGraph depGraph) { 613 createDirectory(dir); 614 try (BufferedWriter w = Files.newBufferedWriter(dir.resolve("LOCALE_DEPS.json"), UTF_8); 615 PrintWriter out = new PrintWriter(w)) { 616 depGraph.writeJsonTo(out, fileHeader); 617 out.flush(); 618 } catch (IOException e) { 619 throw new RuntimeException("cannot write dependency graph file: " + dir, e); 620 } 621 } 622 623 // The set of IDs to process is: 624 // * any file that was written 625 // * any alias target (not written) 626 // 627 // From which we generate the complete "closure" under the "getParent()" function. This set 628 // contains all file (written or not) which need to exist to complete the locale hierarchy. 629 // 630 // Then we remove all the written files to just leave the ones that need to be generated. 631 // This is a simple and robust approach that handles things like "gaps" in non-aliased 632 // locale IDs, where an intermediate parent is not present. calculateEmptyFiles( Set<String> writtenIds, Collection<String> aliasTargetIds)633 private ImmutableSet<String> calculateEmptyFiles( 634 Set<String> writtenIds, Collection<String> aliasTargetIds) { 635 636 Set<String> seedIds = new HashSet<>(writtenIds); 637 seedIds.addAll(aliasTargetIds); 638 // Be nice and sort the output (makes easier debugging). 639 Set<String> allIds = new TreeSet<>(); 640 for (String id : seedIds) { 641 while (!id.equals("root") && !allIds.contains(id)) { 642 allIds.add(id); 643 id = supplementalData.getParent(id); 644 } 645 } 646 return ImmutableSet.copyOf(Sets.difference(allIds, writtenIds)); 647 } 648 649 private static final ImmutableMap<String, IcuLocaleDir> LOCALE_SPLIT_INFO = 650 ImmutableMap.<String, IcuLocaleDir>builder() 651 // BRKITR 652 .put("boundaries", BRKITR) 653 .put("dictionaries", BRKITR) 654 .put("exceptions", BRKITR) 655 // COLL 656 .put("collations", COLL) 657 .put("depends", COLL) 658 .put("UCARules", COLL) 659 // CURR 660 .put("Currencies", CURR) 661 .put("CurrencyPlurals", CURR) 662 .put("CurrencyUnitPatterns", CURR) 663 .put("currencySpacing", CURR) 664 // LANG 665 .put("Keys", LANG) 666 .put("Languages", LANG) 667 .put("Scripts", LANG) 668 .put("Types", LANG) 669 .put("Variants", LANG) 670 .put("characterLabelPattern", LANG) 671 .put("codePatterns", LANG) 672 .put("localeDisplayPattern", LANG) 673 // RBNF 674 .put("RBNFRules", RBNF) 675 // REGION 676 .put("Countries", REGION) 677 // UNIT 678 .put("durationUnits", UNIT) 679 .put("units", UNIT) 680 .put("unitsShort", UNIT) 681 .put("unitsNarrow", UNIT) 682 // ZONE 683 .put("zoneStrings", ZONE) 684 .build(); 685 } 686