1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu; 4 5 import static com.google.common.base.Preconditions.checkArgument; 6 import static com.google.common.base.Preconditions.checkNotNull; 7 import static com.google.common.collect.ImmutableList.toImmutableList; 8 import static java.nio.charset.StandardCharsets.UTF_8; 9 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED; 10 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; 11 import static org.unicode.cldr.api.CldrDataType.BCP47; 12 import static org.unicode.cldr.api.CldrDataType.LDML; 13 import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL; 14 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.BRKITR; 15 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.COLL; 16 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.CURR; 17 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LANG; 18 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LOCALES; 19 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.RBNF; 20 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.REGION; 21 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.UNIT; 22 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.ZONE; 23 24 import java.io.BufferedWriter; 25 import java.io.IOException; 26 import java.io.InputStream; 27 import java.io.InputStreamReader; 28 import java.io.PrintWriter; 29 import java.nio.file.Files; 30 import java.nio.file.Path; 31 import java.util.*; 32 import java.util.function.Predicate; 33 import java.util.stream.Collectors; 34 import java.util.stream.Stream; 35 36 import org.unicode.cldr.api.CldrData; 37 import org.unicode.cldr.api.CldrDataSupplier; 38 import org.unicode.cldr.api.CldrDataType; 39 import org.unicode.cldr.api.CldrPath; 40 import org.unicode.cldr.api.PathMatcher; 41 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir; 42 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuVersionInfo; 43 import org.unicode.icu.tool.cldrtoicu.localedistance.LocaleDistanceMapper; 44 import org.unicode.icu.tool.cldrtoicu.mapper.Bcp47Mapper; 45 import org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapper; 46 import org.unicode.icu.tool.cldrtoicu.mapper.CollationMapper; 47 import org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapper; 48 import org.unicode.icu.tool.cldrtoicu.mapper.LocaleMapper; 49 import org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapper; 50 import org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapper; 51 import org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapper; 52 import org.unicode.icu.tool.cldrtoicu.mapper.SupplementalMapper; 53 import org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapper; 54 import org.unicode.icu.tool.cldrtoicu.regex.RegexTransformer; 55 56 import com.google.common.base.CharMatcher; 57 import com.google.common.collect.HashMultimap; 58 import com.google.common.collect.ImmutableList; 59 import com.google.common.collect.ImmutableListMultimap; 60 import com.google.common.collect.ImmutableMap; 61 import com.google.common.collect.ImmutableSet; 62 import com.google.common.collect.LinkedListMultimap; 63 import com.google.common.collect.ListMultimap; 64 import com.google.common.collect.Maps; 65 import com.google.common.collect.SetMultimap; 66 import com.google.common.collect.Sets; 67 import com.google.common.io.CharStreams; 68 69 /** 70 * The main converter tool for CLDR to ICU data. To run this tool, you need to supply a suitable 71 * {@link LdmlConverterConfig} instance. There is a simple {@code main()} method available in this 72 * class which can be invoked passing just the desired output directory and which relies on the 73 * presence of several system properties for the remainder of its parameters: 74 * <ul> 75 * <li>CLDR_DIR: The root of the CLDR release from which CLDR data is read. 76 * <li>ICU_DIR: The root of the ICU release from which additional "specials" XML data is read. 77 * <li>CLDR_DTD_CACHE: A temporary directory with the various DTDs cached (this is a legacy 78 * requirement from the underlying CLDR libraries and might go away one day). 79 * </ul> 80 */ 81 public final class LdmlConverter { 82 // TODO: Do all supplemental data in one go and split similarly to locale data (using RbPath). 83 private static final Predicate<CldrPath> GENDER_LIST_PATHS = 84 supplementalMatcher("gender"); 85 private static final Predicate<CldrPath> LIKELY_SUBTAGS_PATHS = 86 supplementalMatcher("likelySubtags"); 87 private static final Predicate<CldrPath> METAZONE_PATHS = 88 supplementalMatcher("metaZones", "primaryZones"); 89 private static final Predicate<CldrPath> METADATA_PATHS = 90 supplementalMatcher("metadata"); 91 private static final Predicate<CldrPath> SUPPLEMENTAL_DATA_PATHS = 92 supplementalMatcher( 93 "calendarData", 94 "calendarPreferenceData", 95 "codeMappings", 96 "codeMappingsCurrency", 97 "idValidity", 98 "languageData", 99 "languageMatching", 100 "measurementData", 101 "parentLocales", 102 "subdivisionContainment", 103 "territoryContainment", 104 "territoryInfo", 105 "timeData", 106 "weekData", 107 "weekOfPreference"); 108 private static final Predicate<CldrPath> CURRENCY_DATA_PATHS = 109 supplementalMatcher("currencyData"); 110 private static final Predicate<CldrPath> UNITS_DATA_PATHS = 111 supplementalMatcher( 112 "convertUnits", 113 "unitConstants", 114 "unitQuantities", 115 "unitPreferenceData"); 116 private static final Predicate<CldrPath> GRAMMATICAL_FEATURES_PATHS = 117 supplementalMatcher("grammaticalData"); 118 private static final Predicate<CldrPath> NUMBERING_SYSTEMS_PATHS = 119 supplementalMatcher("numberingSystems"); 120 private static final Predicate<CldrPath> WINDOWS_ZONES_PATHS = 121 supplementalMatcher("windowsZones"); 122 supplementalMatcher(String... spec)123 private static Predicate<CldrPath> supplementalMatcher(String... spec) { 124 checkArgument(spec.length > 0, "must supply at least one matcher spec"); 125 if (spec.length == 1) { 126 return PathMatcher.of("//supplementalData/" + spec[0])::matchesPrefixOf; 127 } 128 return 129 Arrays.stream(spec) 130 .map(s -> PathMatcher.of("//supplementalData/" + s)) 131 .map(m -> ((Predicate<CldrPath>) m::matchesPrefixOf)) 132 .reduce(p -> false, Predicate::or); 133 } 134 135 private static RbPath RB_PARENT = RbPath.of("%%Parent"); 136 // The quotes below are only so we achieve parity with the manually written alias files. 137 // TODO: Remove unnecessary quotes once the migration to this code is complete. 138 private static RbPath RB_ALIAS = RbPath.of("\"%%ALIAS\""); 139 // Special path for adding to empty files which only exist to complete the parent chain. 140 // TODO: Confirm that this has no meaningful effect and unify "empty" file contents. 141 private static RbPath RB_EMPTY_ALIAS = RbPath.of("___"); 142 143 /** 144 * Output types defining specific subsets of the ICU data which can be converted separately. 145 * This closely mimics the original "NewLdml2IcuConverter" behaviour but could be simplified to 146 * hide what are essentially implementation specific data splits. 147 */ 148 public enum OutputType { 149 LOCALES(LDML), 150 BRKITR(LDML), 151 COLL(LDML), 152 RBNF(LDML), 153 DAY_PERIODS(SUPPLEMENTAL), 154 GENDER_LIST(SUPPLEMENTAL), 155 LIKELY_SUBTAGS(SUPPLEMENTAL), 156 SUPPLEMENTAL_DATA(SUPPLEMENTAL), 157 UNITS(SUPPLEMENTAL), 158 CURRENCY_DATA(SUPPLEMENTAL), 159 GRAMMATICAL_FEATURES(SUPPLEMENTAL), 160 METADATA(SUPPLEMENTAL), 161 META_ZONES(SUPPLEMENTAL), 162 NUMBERING_SYSTEMS(SUPPLEMENTAL), 163 PLURALS(SUPPLEMENTAL), 164 PLURAL_RANGES(SUPPLEMENTAL), 165 WINDOWS_ZONES(SUPPLEMENTAL), 166 TRANSFORMS(SUPPLEMENTAL), 167 LOCALE_DISTANCE(SUPPLEMENTAL), 168 VERSION(SUPPLEMENTAL), 169 KEY_TYPE_DATA(BCP47); 170 171 public static final ImmutableSet<OutputType> ALL = ImmutableSet.copyOf(OutputType.values()); 172 173 private final CldrDataType type; 174 OutputType(CldrDataType type)175 OutputType(CldrDataType type) { 176 this.type = checkNotNull(type); 177 } 178 getCldrType()179 CldrDataType getCldrType() { 180 return type; 181 } 182 } 183 184 // Map to convert the rather arbitrarily defined "output types" to the directories into which 185 // the data is written. This is only for "LDML" types since other mappers don't need to split 186 // data into multiple directories. 187 private static final ImmutableListMultimap<OutputType, IcuLocaleDir> TYPE_TO_DIR = 188 ImmutableListMultimap.<OutputType, IcuLocaleDir>builder() 189 .putAll(OutputType.LOCALES, CURR, LANG, LOCALES, REGION, UNIT, ZONE) 190 .putAll(OutputType.BRKITR, BRKITR) 191 .putAll(OutputType.COLL, COLL) 192 .putAll(OutputType.RBNF, RBNF) 193 .build(); 194 195 /** Converts CLDR data according to the given configuration. */ convert( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)196 public static void convert( 197 CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) { 198 new LdmlConverter(src, supplementalData, config).convertAll(); 199 } 200 201 // The supplier for all data to be converted. 202 private final CldrDataSupplier src; 203 // Supplemental data available to mappers if needed. 204 private final SupplementalData supplementalData; 205 // The configuration controlling conversion behaviour. 206 private final LdmlConverterConfig config; 207 // The set of expanded target locale IDs. 208 // TODO: Make available IDs include specials files (or fail if specials are not available). 209 private final ImmutableSet<String> availableIds; 210 // Transformer for locale data. 211 private final PathValueTransformer localeTransformer; 212 // Transformer for supplemental data. 213 private final PathValueTransformer supplementalTransformer; 214 // Header string to go into every ICU data and transliteration rule file (comment prefixes 215 // are not present and must be added by the code writing the file). 216 private final ImmutableList<String> fileHeader; 217 LdmlConverter( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)218 private LdmlConverter( 219 CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) { 220 this.src = checkNotNull(src); 221 this.supplementalData = checkNotNull(supplementalData); 222 this.config = checkNotNull(config); 223 this.availableIds = ImmutableSet.copyOf( 224 Sets.intersection(supplementalData.getAvailableLocaleIds(), config.getAllLocaleIds())); 225 // Load the remaining path value transformers. 226 this.supplementalTransformer = 227 RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_supplemental.txt"), 228 IcuFunctions.ALGORITHM_FN, 229 IcuFunctions.DATE_FN, 230 IcuFunctions.DAY_NUMBER_FN, 231 IcuFunctions.EXP_FN, 232 IcuFunctions.YMD_FN); 233 this.localeTransformer = 234 RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_locale.txt"), 235 IcuFunctions.CONTEXT_TRANSFORM_INDEX_FN); 236 this.fileHeader = readLinesFromResource("/ldml2icu_header.txt"); 237 } 238 convertAll()239 private void convertAll() { 240 processLdml(); 241 processSupplemental(); 242 if (config.emitReport()) { 243 System.out.println("Supplemental Data Transformer=" + supplementalTransformer); 244 System.out.println("Locale Data Transformer=" + localeTransformer); 245 } 246 } 247 readLinesFromResource(String name)248 private static ImmutableList<String> readLinesFromResource(String name) { 249 try (InputStream in = LdmlConverter.class.getResourceAsStream(name)) { 250 return ImmutableList.copyOf(CharStreams.readLines(new InputStreamReader(in, UTF_8))); 251 } catch (IOException e) { 252 throw new RuntimeException("cannot read resource: " + name, e); 253 } 254 } 255 loadSpecialsData(String localeId)256 private Optional<CldrData> loadSpecialsData(String localeId) { 257 String expected = localeId + ".xml"; 258 try (Stream<Path> files = Files.walk(config.getSpecialsDir())) { 259 Set<Path> xmlFiles = files 260 .filter(Files::isRegularFile) 261 .filter(f -> f.getFileName().toString().equals(expected)) 262 .collect(Collectors.toSet()); 263 return !xmlFiles.isEmpty() 264 ? Optional.of( 265 CldrDataSupplier.forCldrFiles(LDML, config.getMinimumDraftStatus(), xmlFiles)) 266 : Optional.empty(); 267 } catch (IOException e) { 268 throw new RuntimeException( 269 "error processing specials directory: " + config.getSpecialsDir(), e); 270 } 271 } 272 processLdml()273 private void processLdml() { 274 ImmutableList<IcuLocaleDir> splitDirs = 275 config.getOutputTypes().stream() 276 .filter(t -> t.getCldrType() == LDML) 277 .flatMap(t -> TYPE_TO_DIR.get(t).stream()) 278 .collect(toImmutableList()); 279 if (splitDirs.isEmpty()) { 280 return; 281 } 282 283 String cldrVersion = config.getVersionInfo().getCldrVersion(); 284 285 Map<IcuLocaleDir, DependencyGraph> graphMetadata = new HashMap<>(); 286 splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph(cldrVersion))); 287 288 SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create(); 289 Path baseDir = config.getOutputDir(); 290 291 System.out.println("processing standard ldml files"); 292 for (String id : config.getAllLocaleIds()) { 293 // Skip "target" IDs that are aliases (they are handled later). 294 if (!availableIds.contains(id)) { 295 continue; 296 } 297 // TODO: Remove the following skip when ICU-20997 is fixed 298 if (id.contains("VALENCIA") || id.contains("TARASK")) { 299 System.out.println("(skipping " + id + " until ICU-20997 is fixed)"); 300 continue; 301 } 302 303 IcuData icuData = new IcuData(id, true); 304 305 Optional<CldrData> specials = loadSpecialsData(id); 306 CldrData unresolved = src.getDataForLocale(id, UNRESOLVED); 307 308 BreakIteratorMapper.process(icuData, unresolved, specials); 309 CollationMapper.process(icuData, unresolved, specials, cldrVersion); 310 RbnfMapper.process(icuData, unresolved, specials); 311 312 CldrData resolved = src.getDataForLocale(id, RESOLVED); 313 Optional<String> defaultCalendar = supplementalData.getDefaultCalendar(id); 314 LocaleMapper.process( 315 icuData, unresolved, resolved, specials, localeTransformer, defaultCalendar); 316 317 ListMultimap<IcuLocaleDir, RbPath> splitPaths = LinkedListMultimap.create(); 318 for (RbPath p : icuData.getPaths()) { 319 String rootName = getBaseSegmentName(p.getSegment(0)); 320 splitPaths.put(LOCALE_SPLIT_INFO.getOrDefault(rootName, LOCALES), p); 321 } 322 323 Optional<String> parent = supplementalData.getExplicitParentLocaleOf(id); 324 // We always write base languages (even if empty). 325 boolean isBaseLanguage = !id.contains("_"); 326 // Run through all directories (not just the keySet() of the split path map) since we 327 // sometimes write empty files. 328 for (IcuLocaleDir dir : splitDirs) { 329 Set<String> targetIds = config.getTargetLocaleIds(dir); 330 if (!targetIds.contains(id)) { 331 if (!splitPaths.get(dir).isEmpty()) { 332 System.out.format( 333 "target IDs for %s does not contain %s, but it has data: %s\n", 334 dir, id, splitPaths.get(dir)); 335 } 336 continue; 337 } 338 339 Path outDir = baseDir.resolve(dir.getOutputDir()); 340 IcuData splitData = new IcuData(icuData.getName(), icuData.hasFallback()); 341 342 // The split data can still be empty for this directory, but that's expected (it 343 // might only be written because it has an explicit parent added below). 344 splitPaths.get(dir).forEach(p -> splitData.add(p, icuData.get(p))); 345 346 // If we add an explicit parent locale, it forces the data to be written. This is 347 // where we check for forced overrides of the parent relationship (which is a per 348 // directory thing). 349 getIcuParent(id, parent, dir).ifPresent(p -> { 350 splitData.add(RB_PARENT, p); 351 graphMetadata.get(dir).addParent(id, p); 352 }); 353 354 if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) { 355 if (id.equals("root")) { 356 splitData.setVersion(cldrVersion); 357 } 358 write(splitData, outDir, false); 359 writtenLocaleIds.put(dir, id); 360 } 361 } 362 } 363 364 System.out.println("processing alias ldml files"); 365 for (IcuLocaleDir dir : splitDirs) { 366 Path outDir = baseDir.resolve(dir.getOutputDir()); 367 Set<String> targetIds = config.getTargetLocaleIds(dir); 368 DependencyGraph depGraph = graphMetadata.get(dir); 369 370 // TODO: Maybe calculate alias map directly into the dependency graph? 371 Map<String, String> aliasMap = getAliasMap(targetIds, dir); 372 aliasMap.forEach((s, t) -> { 373 depGraph.addAlias(s, t); 374 writeAliasFile(s, t, outDir); 375 // It's only important to record which alias files are written because of forced 376 // aliases, but since it's harmless otherwise, we just do it unconditionally. 377 // Normal alias files don't affect the empty file calculation, but forced ones can. 378 writtenLocaleIds.put(dir, s); 379 }); 380 381 calculateEmptyFiles(writtenLocaleIds.get(dir), aliasMap.values()) 382 .forEach(id -> writeEmptyFile(id, outDir, aliasMap.values())); 383 384 writeDependencyGraph(outDir, depGraph); 385 } 386 } 387 388 389 private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%"); 390 391 // Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type 392 // annotations (e.g. "languages:intvector"). We strip these when considering the element name. getBaseSegmentName(String segment)393 private static String getBaseSegmentName(String segment) { 394 int idx = PATH_MODIFIER.indexIn(segment); 395 return idx == -1 ? segment : segment.substring(0, idx); 396 } 397 398 /* 399 * There are four reasons for treating a locale ID as an alias. 400 * 1: It contains deprecated subtags (e.g. "sr_YU", which should be "sr_Cyrl_RS"). 401 * 2: It has no CLDR data but is missing a script subtag. 402 * 3: It is one of the special "phantom" alias which cannot be represented normally 403 * and must be manually mapped (e.g. legacy locale IDs which don't even parse). 404 * 4: It is a "super special" forced alias, which might replace existing aliases in 405 * some output directories. 406 */ getAliasMap(Set<String> localeIds, IcuLocaleDir dir)407 private Map<String, String> getAliasMap(Set<String> localeIds, IcuLocaleDir dir) { 408 // Even forced aliases only apply if they are in the set of locale IDs for the directory. 409 Map<String, String> forcedAliases = 410 Maps.filterKeys(config.getForcedAliases(dir), localeIds::contains); 411 412 Map<String, String> aliasMap = new LinkedHashMap<>(); 413 for (String id : localeIds) { 414 if (forcedAliases.containsKey(id)) { 415 // Forced aliases will be added later and don't need to be processed here. This 416 // is especially necessary if the ID is not structurally valid (e.g. "no_NO_NY") 417 // since that cannot be processed by the code below. 418 continue; 419 } 420 String canonicalId = supplementalData.replaceDeprecatedTags(id); 421 if (!canonicalId.equals(id)) { 422 // If the canonical form of an ID differs from the requested ID, the this is an 423 // alias, and just needs to point to the canonical ID. 424 aliasMap.put(id, canonicalId); 425 continue; 426 } 427 if (availableIds.contains(id)) { 428 // If it's canonical and supported, it's not an alias. 429 continue; 430 } 431 // If the requested locale is not supported, maximize it and alias to that. 432 String maximizedId = supplementalData.maximize(id) 433 .orElseThrow(() -> new IllegalArgumentException("unsupported locale ID: " + id)); 434 // We can't alias to ourselves and we shouldn't be here is the ID was already maximal. 435 checkArgument(!maximizedId.equals(id), "unsupported maximized locale ID: %s", id); 436 aliasMap.put(id, maximizedId); 437 } 438 // Important that we overwrite entries which might already exist here, since we might have 439 // already calculated a "natural" alias for something that we want to force (and we should 440 // replace the existing target, since that affects how we determine empty files later). 441 aliasMap.putAll(forcedAliases); 442 return aliasMap; 443 } 444 445 /* 446 * Helper to determine the correct parent ID to be written into the ICU data file. The rules 447 * are: 448 * 1: If no forced parent exists (common) write the explicit parent (if that exists) 449 * 2: If a forced parent exists, but the forced value is what you would get by just truncating 450 * the current locale ID, write nothing (ICU libraries truncate when no parent is set). 451 * 3: Write the forced parent (this is an exceptional case, and may not even occur in data). 452 */ getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir)453 private Optional<String> getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir) { 454 String forcedParentId = config.getForcedParents(dir).get(id); 455 if (forcedParentId == null) { 456 return parent; 457 } 458 return id.contains("_") && forcedParentId.regionMatches(0, id, 0, id.lastIndexOf('_')) 459 ? Optional.empty() : Optional.of(forcedParentId); 460 } 461 processSupplemental()462 private void processSupplemental() { 463 for (OutputType type : config.getOutputTypes()) { 464 if (type.getCldrType() == LDML) { 465 continue; 466 } 467 System.out.println("processing supplemental type " + type); 468 switch (type) { 469 case DAY_PERIODS: 470 write(DayPeriodsMapper.process(src), "misc"); 471 break; 472 473 case GENDER_LIST: 474 processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false); 475 break; 476 477 case LIKELY_SUBTAGS: 478 processSupplemental("likelySubtags", LIKELY_SUBTAGS_PATHS, "misc", false); 479 break; 480 481 case SUPPLEMENTAL_DATA: 482 processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true); 483 break; 484 485 case UNITS: 486 processSupplemental("units", UNITS_DATA_PATHS, "misc", true); 487 break; 488 489 case CURRENCY_DATA: 490 processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", false); 491 break; 492 493 case GRAMMATICAL_FEATURES: 494 processSupplemental("grammaticalFeatures", GRAMMATICAL_FEATURES_PATHS, "misc", false); 495 break; 496 497 case METADATA: 498 processSupplemental("metadata", METADATA_PATHS, "misc", false); 499 break; 500 501 case META_ZONES: 502 processSupplemental("metaZones", METAZONE_PATHS, "misc", false); 503 break; 504 505 case NUMBERING_SYSTEMS: 506 processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false); 507 break; 508 509 case PLURALS: 510 write(PluralsMapper.process(src), "misc"); 511 break; 512 513 case PLURAL_RANGES: 514 write(PluralRangesMapper.process(src), "misc"); 515 break; 516 517 case LOCALE_DISTANCE: 518 write(LocaleDistanceMapper.process(src), "misc"); 519 break; 520 521 case WINDOWS_ZONES: 522 processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false); 523 break; 524 525 case TRANSFORMS: 526 Path transformDir = createDirectory(config.getOutputDir().resolve("translit")); 527 write(TransformsMapper.process(src, transformDir, fileHeader), transformDir, false); 528 break; 529 530 case VERSION: 531 writeIcuVersionInfo(); 532 break; 533 534 case KEY_TYPE_DATA: 535 Bcp47Mapper.process(src).forEach(d -> write(d, "misc")); 536 break; 537 538 default: 539 throw new AssertionError("Unsupported supplemental type: " + type); 540 } 541 } 542 } 543 544 private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion"); 545 processSupplemental( String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion)546 private void processSupplemental( 547 String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion) { 548 IcuData icuData = 549 SupplementalMapper.process(src, supplementalTransformer, label, paths); 550 // A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the 551 // supplemental data XML files. 552 if (addCldrVersion) { 553 // Not the same path as used by "setVersion()" 554 icuData.add(RB_CLDR_VERSION, config.getVersionInfo().getCldrVersion()); 555 } 556 write(icuData, dir); 557 } 558 writeAliasFile(String srcId, String destId, Path dir)559 private void writeAliasFile(String srcId, String destId, Path dir) { 560 IcuData icuData = new IcuData(srcId, true); 561 icuData.add(RB_ALIAS, destId); 562 // Allow overwrite for aliases since some are "forced" and overwrite existing targets. 563 // TODO: Maybe tighten this up so only forced aliases for existing targets are overwritten. 564 write(icuData, dir, true); 565 } 566 writeEmptyFile(String id, Path dir, Collection<String> aliasTargets)567 private void writeEmptyFile(String id, Path dir, Collection<String> aliasTargets) { 568 IcuData icuData = new IcuData(id, true); 569 // TODO: Document the reason for this (i.e. why does it matter what goes into empty files?) 570 if (aliasTargets.contains(id)) { 571 icuData.setFileComment("generated alias target"); 572 icuData.add(RB_EMPTY_ALIAS, ""); 573 } else { 574 // These empty files only exist because the target of an alias has a parent locale 575 // which is itself not in the set of written ICU files. An "indirect alias target". 576 // No need to add data: Just write a resource bundle with an empty top-level table. 577 } 578 write(icuData, dir, false); 579 } 580 writeIcuVersionInfo()581 private void writeIcuVersionInfo() { 582 IcuVersionInfo versionInfo = config.getVersionInfo(); 583 IcuData versionData = new IcuData("icuver", false); 584 versionData.add(RbPath.of("ICUVersion"), versionInfo.getIcuVersion()); 585 versionData.add(RbPath.of("DataVersion"), versionInfo.getIcuDataVersion()); 586 versionData.add(RbPath.of("CLDRVersion"), versionInfo.getCldrVersion()); 587 // Write file via non-helper methods since we need to include a legacy copyright. 588 Path miscDir = config.getOutputDir().resolve("misc"); 589 createDirectory(miscDir); 590 ImmutableList<String> versionHeader = ImmutableList.<String>builder() 591 .addAll(fileHeader) 592 .add( 593 "***************************************************************************", 594 "*", 595 "* Copyright (C) 2010-2016 International Business Machines", 596 "* Corporation and others. All Rights Reserved.", 597 "*", 598 "***************************************************************************") 599 .build(); 600 IcuTextWriter.writeToFile(versionData, miscDir, versionHeader, false); 601 } 602 603 // Commonest case for writing data files in "normal" directories. write(IcuData icuData, String dir)604 private void write(IcuData icuData, String dir) { 605 write(icuData, config.getOutputDir().resolve(dir), false); 606 } 607 write(IcuData icuData, Path dir, boolean allowOverwrite)608 private void write(IcuData icuData, Path dir, boolean allowOverwrite) { 609 createDirectory(dir); 610 IcuTextWriter.writeToFile(icuData, dir, fileHeader, allowOverwrite); 611 } 612 createDirectory(Path dir)613 private Path createDirectory(Path dir) { 614 try { 615 Files.createDirectories(dir); 616 } catch (IOException e) { 617 throw new RuntimeException("cannot create directory: " + dir, e); 618 } 619 return dir; 620 } 621 writeDependencyGraph(Path dir, DependencyGraph depGraph)622 private void writeDependencyGraph(Path dir, DependencyGraph depGraph) { 623 createDirectory(dir); 624 try (BufferedWriter w = Files.newBufferedWriter(dir.resolve("LOCALE_DEPS.json"), UTF_8); 625 PrintWriter out = new PrintWriter(w)) { 626 depGraph.writeJsonTo(out, fileHeader); 627 out.flush(); 628 } catch (IOException e) { 629 throw new RuntimeException("cannot write dependency graph file: " + dir, e); 630 } 631 } 632 633 // The set of IDs to process is: 634 // * any file that was written 635 // * any alias target (not written) 636 // 637 // From which we generate the complete "closure" under the "getParent()" function. This set 638 // contains all file (written or not) which need to exist to complete the locale hierarchy. 639 // 640 // Then we remove all the written files to just leave the ones that need to be generated. 641 // This is a simple and robust approach that handles things like "gaps" in non-aliased 642 // locale IDs, where an intermediate parent is not present. calculateEmptyFiles( Set<String> writtenIds, Collection<String> aliasTargetIds)643 private ImmutableSet<String> calculateEmptyFiles( 644 Set<String> writtenIds, Collection<String> aliasTargetIds) { 645 646 Set<String> seedIds = new HashSet<>(writtenIds); 647 seedIds.addAll(aliasTargetIds); 648 // Be nice and sort the output (makes easier debugging). 649 Set<String> allIds = new TreeSet<>(); 650 for (String id : seedIds) { 651 while (!id.equals("root") && !allIds.contains(id)) { 652 allIds.add(id); 653 id = supplementalData.getParent(id); 654 } 655 } 656 return ImmutableSet.copyOf(Sets.difference(allIds, writtenIds)); 657 } 658 659 private static final ImmutableMap<String, IcuLocaleDir> LOCALE_SPLIT_INFO = 660 ImmutableMap.<String, IcuLocaleDir>builder() 661 // BRKITR 662 .put("boundaries", BRKITR) 663 .put("dictionaries", BRKITR) 664 .put("exceptions", BRKITR) 665 // COLL 666 .put("collations", COLL) 667 .put("depends", COLL) 668 .put("UCARules", COLL) 669 // CURR 670 .put("Currencies", CURR) 671 .put("CurrencyPlurals", CURR) 672 .put("CurrencyUnitPatterns", CURR) 673 .put("currencySpacing", CURR) 674 // LANG 675 .put("Keys", LANG) 676 .put("Languages", LANG) 677 .put("Scripts", LANG) 678 .put("Types", LANG) 679 .put("Variants", LANG) 680 .put("characterLabelPattern", LANG) 681 .put("codePatterns", LANG) 682 .put("localeDisplayPattern", LANG) 683 // RBNF 684 .put("RBNFRules", RBNF) 685 // REGION 686 .put("Countries", REGION) 687 // UNIT 688 .put("durationUnits", UNIT) 689 .put("units", UNIT) 690 .put("unitsShort", UNIT) 691 .put("unitsNarrow", UNIT) 692 // ZONE 693 .put("zoneStrings", ZONE) 694 .build(); 695 } 696