1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu; 4 5 import static com.google.common.base.Preconditions.checkArgument; 6 import static com.google.common.base.Preconditions.checkNotNull; 7 import static com.google.common.collect.ImmutableList.toImmutableList; 8 import static java.nio.charset.StandardCharsets.UTF_8; 9 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED; 10 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; 11 import static org.unicode.cldr.api.CldrDataType.BCP47; 12 import static org.unicode.cldr.api.CldrDataType.LDML; 13 import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL; 14 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.BRKITR; 15 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.COLL; 16 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.CURR; 17 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LANG; 18 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LOCALES; 19 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.RBNF; 20 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.REGION; 21 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.UNIT; 22 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.ZONE; 23 24 import java.io.BufferedWriter; 25 import java.io.IOException; 26 import java.io.InputStream; 27 import java.io.InputStreamReader; 28 import java.io.PrintWriter; 29 import java.nio.file.Files; 30 import java.nio.file.Path; 31 import java.util.*; 32 import java.util.function.Predicate; 33 import java.util.stream.Collectors; 34 import java.util.stream.Stream; 35 36 import org.unicode.cldr.api.CldrData; 37 import org.unicode.cldr.api.CldrDataSupplier; 38 import org.unicode.cldr.api.CldrDataType; 39 import org.unicode.cldr.api.CldrPath; 40 import org.unicode.cldr.api.PathMatcher; 41 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir; 42 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuVersionInfo; 43 import org.unicode.icu.tool.cldrtoicu.localedistance.LocaleDistanceMapper; 44 import org.unicode.icu.tool.cldrtoicu.mapper.Bcp47Mapper; 45 import org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapper; 46 import org.unicode.icu.tool.cldrtoicu.mapper.CollationMapper; 47 import org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapper; 48 import org.unicode.icu.tool.cldrtoicu.mapper.LocaleMapper; 49 import org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapper; 50 import org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapper; 51 import org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapper; 52 import org.unicode.icu.tool.cldrtoicu.mapper.SupplementalMapper; 53 import org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapper; 54 import org.unicode.icu.tool.cldrtoicu.regex.RegexTransformer; 55 56 import com.google.common.base.CharMatcher; 57 import com.google.common.collect.HashMultimap; 58 import com.google.common.collect.ImmutableList; 59 import com.google.common.collect.ImmutableListMultimap; 60 import com.google.common.collect.ImmutableMap; 61 import com.google.common.collect.ImmutableSet; 62 import com.google.common.collect.LinkedListMultimap; 63 import com.google.common.collect.ListMultimap; 64 import com.google.common.collect.Maps; 65 import com.google.common.collect.SetMultimap; 66 import com.google.common.collect.Sets; 67 import com.google.common.io.CharStreams; 68 69 /** 70 * The main converter tool for CLDR to ICU data. To run this tool, you need to supply a suitable 71 * {@link LdmlConverterConfig} instance. There is a simple {@code main()} method available in this 72 * class which can be invoked passing just the desired output directory and which relies on the 73 * presence of several system properties for the remainder of its parameters: 74 * <ul> 75 * <li>CLDR_DIR: The root of the CLDR release from which CLDR data is read. 76 * <li>ICU_DIR: The root of the ICU release from which additional "specials" XML data is read. 77 * <li>CLDR_DTD_CACHE: A temporary directory with the various DTDs cached (this is a legacy 78 * requirement from the underlying CLDR libraries and might go away one day). 79 * </ul> 80 */ 81 public final class LdmlConverter { 82 // TODO: Do all supplemental data in one go and split similarly to locale data (using RbPath). 83 private static final Predicate<CldrPath> GENDER_LIST_PATHS = 84 supplementalMatcher("gender"); 85 private static final Predicate<CldrPath> LIKELY_SUBTAGS_PATHS = 86 supplementalMatcher("likelySubtags"); 87 private static final Predicate<CldrPath> METAZONE_PATHS = 88 supplementalMatcher("metaZones", "primaryZones"); 89 private static final Predicate<CldrPath> METADATA_PATHS = 90 supplementalMatcher("metadata"); 91 private static final Predicate<CldrPath> SUPPLEMENTAL_DATA_PATHS = 92 supplementalMatcher( 93 "calendarData", 94 "calendarPreferenceData", 95 "codeMappings", 96 "codeMappingsCurrency", 97 "idValidity", 98 "languageData", 99 "languageMatching", 100 "measurementData", 101 "parentLocales", 102 "personNamesDefaults", 103 "subdivisionContainment", 104 "territoryContainment", 105 "territoryInfo", 106 "timeData", 107 "weekData", 108 "weekOfPreference"); 109 private static final Predicate<CldrPath> CURRENCY_DATA_PATHS = 110 supplementalMatcher("currencyData"); 111 private static final Predicate<CldrPath> UNITS_DATA_PATHS = 112 supplementalMatcher( 113 "convertUnits", 114 "unitConstants", 115 "unitQuantities", 116 "unitPreferenceData"); 117 private static final Predicate<CldrPath> GRAMMATICAL_FEATURES_PATHS = 118 supplementalMatcher("grammaticalData"); 119 private static final Predicate<CldrPath> NUMBERING_SYSTEMS_PATHS = 120 supplementalMatcher("numberingSystems"); 121 private static final Predicate<CldrPath> WINDOWS_ZONES_PATHS = 122 supplementalMatcher("windowsZones"); 123 supplementalMatcher(String... spec)124 private static Predicate<CldrPath> supplementalMatcher(String... spec) { 125 checkArgument(spec.length > 0, "must supply at least one matcher spec"); 126 if (spec.length == 1) { 127 return PathMatcher.of("//supplementalData/" + spec[0])::matchesPrefixOf; 128 } 129 return 130 Arrays.stream(spec) 131 .map(s -> PathMatcher.of("//supplementalData/" + s)) 132 .map(m -> ((Predicate<CldrPath>) m::matchesPrefixOf)) 133 .reduce(p -> false, Predicate::or); 134 } 135 136 private static RbPath RB_PARENT = RbPath.of("%%Parent"); 137 // The quotes below are only so we achieve parity with the manually written alias files. 138 // TODO: Remove unnecessary quotes once the migration to this code is complete. 139 private static RbPath RB_ALIAS = RbPath.of("\"%%ALIAS\""); 140 // Special path for adding to empty files which only exist to complete the parent chain. 141 // TODO: Confirm that this has no meaningful effect and unify "empty" file contents. 142 private static RbPath RB_EMPTY_ALIAS = RbPath.of("___"); 143 144 /** 145 * Output types defining specific subsets of the ICU data which can be converted separately. 146 * This closely mimics the original "NewLdml2IcuConverter" behaviour but could be simplified to 147 * hide what are essentially implementation specific data splits. 148 */ 149 public enum OutputType { 150 LOCALES(LDML), 151 BRKITR(LDML), 152 COLL(LDML), 153 RBNF(LDML), 154 DAY_PERIODS(SUPPLEMENTAL), 155 GENDER_LIST(SUPPLEMENTAL), 156 LIKELY_SUBTAGS(SUPPLEMENTAL), 157 SUPPLEMENTAL_DATA(SUPPLEMENTAL), 158 UNITS(SUPPLEMENTAL), 159 CURRENCY_DATA(SUPPLEMENTAL), 160 GRAMMATICAL_FEATURES(SUPPLEMENTAL), 161 METADATA(SUPPLEMENTAL), 162 META_ZONES(SUPPLEMENTAL), 163 NUMBERING_SYSTEMS(SUPPLEMENTAL), 164 PLURALS(SUPPLEMENTAL), 165 PLURAL_RANGES(SUPPLEMENTAL), 166 WINDOWS_ZONES(SUPPLEMENTAL), 167 TRANSFORMS(SUPPLEMENTAL), 168 LOCALE_DISTANCE(SUPPLEMENTAL), 169 VERSION(SUPPLEMENTAL), 170 KEY_TYPE_DATA(BCP47); 171 172 public static final ImmutableSet<OutputType> ALL = ImmutableSet.copyOf(OutputType.values()); 173 174 private final CldrDataType type; 175 OutputType(CldrDataType type)176 OutputType(CldrDataType type) { 177 this.type = checkNotNull(type); 178 } 179 getCldrType()180 CldrDataType getCldrType() { 181 return type; 182 } 183 } 184 185 // Map to convert the rather arbitrarily defined "output types" to the directories into which 186 // the data is written. This is only for "LDML" types since other mappers don't need to split 187 // data into multiple directories. 188 private static final ImmutableListMultimap<OutputType, IcuLocaleDir> TYPE_TO_DIR = 189 ImmutableListMultimap.<OutputType, IcuLocaleDir>builder() 190 .putAll(OutputType.LOCALES, CURR, LANG, LOCALES, REGION, UNIT, ZONE) 191 .putAll(OutputType.BRKITR, BRKITR) 192 .putAll(OutputType.COLL, COLL) 193 .putAll(OutputType.RBNF, RBNF) 194 .build(); 195 196 /** Converts CLDR data according to the given configuration. */ convert( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)197 public static void convert( 198 CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) { 199 new LdmlConverter(src, supplementalData, config).convertAll(); 200 } 201 202 // The supplier for all data to be converted. 203 private final CldrDataSupplier src; 204 // Supplemental data available to mappers if needed. 205 private final SupplementalData supplementalData; 206 // The configuration controlling conversion behaviour. 207 private final LdmlConverterConfig config; 208 // The set of expanded target locale IDs. 209 // TODO: Make available IDs include specials files (or fail if specials are not available). 210 private final ImmutableSet<String> availableIds; 211 // Transformer for locale data. 212 private final PathValueTransformer localeTransformer; 213 // Transformer for supplemental data. 214 private final PathValueTransformer supplementalTransformer; 215 // Header string to go into every ICU data and transliteration rule file (comment prefixes 216 // are not present and must be added by the code writing the file). 217 private final ImmutableList<String> fileHeader; 218 LdmlConverter( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)219 private LdmlConverter( 220 CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) { 221 this.src = checkNotNull(src); 222 this.supplementalData = checkNotNull(supplementalData); 223 this.config = checkNotNull(config); 224 this.availableIds = ImmutableSet.copyOf( 225 Sets.intersection(supplementalData.getAvailableLocaleIds(), config.getAllLocaleIds())); 226 // Load the remaining path value transformers. 227 this.supplementalTransformer = 228 RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_supplemental.txt"), 229 IcuFunctions.ALGORITHM_FN, 230 IcuFunctions.DATE_FN, 231 IcuFunctions.DAY_NUMBER_FN, 232 IcuFunctions.EXP_FN, 233 IcuFunctions.YMD_FN); 234 this.localeTransformer = 235 RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_locale.txt"), 236 IcuFunctions.CONTEXT_TRANSFORM_INDEX_FN); 237 this.fileHeader = readLinesFromResource("/ldml2icu_header.txt"); 238 } 239 convertAll()240 private void convertAll() { 241 processLdml(); 242 processSupplemental(); 243 if (config.emitReport()) { 244 System.out.println("Supplemental Data Transformer=" + supplementalTransformer); 245 System.out.println("Locale Data Transformer=" + localeTransformer); 246 } 247 } 248 readLinesFromResource(String name)249 private static ImmutableList<String> readLinesFromResource(String name) { 250 try (InputStream in = LdmlConverter.class.getResourceAsStream(name)) { 251 return ImmutableList.copyOf(CharStreams.readLines(new InputStreamReader(in, UTF_8))); 252 } catch (IOException e) { 253 throw new RuntimeException("cannot read resource: " + name, e); 254 } 255 } 256 loadSpecialsData(String localeId)257 private Optional<CldrData> loadSpecialsData(String localeId) { 258 String expected = localeId + ".xml"; 259 try (Stream<Path> files = Files.walk(config.getSpecialsDir())) { 260 Set<Path> xmlFiles = files 261 .filter(Files::isRegularFile) 262 .filter(f -> f.getFileName().toString().equals(expected)) 263 .collect(Collectors.toSet()); 264 return !xmlFiles.isEmpty() 265 ? Optional.of( 266 CldrDataSupplier.forCldrFiles(LDML, config.getMinimumDraftStatus(), xmlFiles)) 267 : Optional.empty(); 268 } catch (IOException e) { 269 throw new RuntimeException( 270 "error processing specials directory: " + config.getSpecialsDir(), e); 271 } 272 } 273 processLdml()274 private void processLdml() { 275 ImmutableList<IcuLocaleDir> splitDirs = 276 config.getOutputTypes().stream() 277 .filter(t -> t.getCldrType() == LDML) 278 .flatMap(t -> TYPE_TO_DIR.get(t).stream()) 279 .collect(toImmutableList()); 280 if (splitDirs.isEmpty()) { 281 return; 282 } 283 284 String cldrVersion = config.getVersionInfo().getCldrVersion(); 285 286 Map<IcuLocaleDir, DependencyGraph> graphMetadata = new HashMap<>(); 287 splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph(cldrVersion))); 288 289 SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create(); 290 Path baseDir = config.getOutputDir(); 291 292 System.out.println("processing standard ldml files"); 293 for (String id : config.getAllLocaleIds()) { 294 // Skip "target" IDs that are aliases (they are handled later). 295 if (!availableIds.contains(id)) { 296 continue; 297 } 298 // TODO: Remove the following skip when ICU-20997 is fixed 299 if (id.contains("VALENCIA") || id.contains("TARASK")) { 300 System.out.println("(skipping " + id + " until ICU-20997 is fixed)"); 301 continue; 302 } 303 304 IcuData icuData = new IcuData(id, true); 305 306 Optional<CldrData> specials = loadSpecialsData(id); 307 CldrData unresolved = src.getDataForLocale(id, UNRESOLVED); 308 309 BreakIteratorMapper.process(icuData, unresolved, specials); 310 CollationMapper.process(icuData, unresolved, specials, cldrVersion); 311 RbnfMapper.process(icuData, unresolved, specials); 312 313 CldrData resolved = src.getDataForLocale(id, RESOLVED); 314 Optional<String> defaultCalendar = supplementalData.getDefaultCalendar(id); 315 LocaleMapper.process( 316 icuData, unresolved, resolved, specials, localeTransformer, defaultCalendar); 317 318 ListMultimap<IcuLocaleDir, RbPath> splitPaths = LinkedListMultimap.create(); 319 for (RbPath p : icuData.getPaths()) { 320 String rootName = getBaseSegmentName(p.getSegment(0)); 321 splitPaths.put(LOCALE_SPLIT_INFO.getOrDefault(rootName, LOCALES), p); 322 } 323 324 Optional<String> parent = supplementalData.getExplicitParentLocaleOf(id); 325 // We always write base languages (even if empty). 326 boolean isBaseLanguage = !id.contains("_"); 327 // Run through all directories (not just the keySet() of the split path map) since we 328 // sometimes write empty files. 329 for (IcuLocaleDir dir : splitDirs) { 330 Set<String> targetIds = config.getTargetLocaleIds(dir); 331 if (!targetIds.contains(id)) { 332 if (!splitPaths.get(dir).isEmpty()) { 333 System.out.format( 334 "target IDs for %s does not contain %s, but it has data: %s\n", 335 dir, id, splitPaths.get(dir)); 336 } 337 continue; 338 } 339 340 Path outDir = baseDir.resolve(dir.getOutputDir()); 341 IcuData splitData = new IcuData(icuData.getName(), icuData.hasFallback()); 342 343 // The split data can still be empty for this directory, but that's expected (it 344 // might only be written because it has an explicit parent added below). 345 splitPaths.get(dir).forEach(p -> splitData.add(p, icuData.get(p))); 346 347 // If we add an explicit parent locale, it forces the data to be written. This is 348 // where we check for forced overrides of the parent relationship (which is a per 349 // directory thing). 350 getIcuParent(id, parent, dir).ifPresent(p -> { 351 splitData.add(RB_PARENT, p); 352 graphMetadata.get(dir).addParent(id, p); 353 }); 354 355 if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) { 356 if (id.equals("root")) { 357 splitData.setVersion(cldrVersion); 358 } 359 write(splitData, outDir, false); 360 writtenLocaleIds.put(dir, id); 361 } 362 } 363 } 364 365 System.out.println("processing alias ldml files"); 366 for (IcuLocaleDir dir : splitDirs) { 367 Path outDir = baseDir.resolve(dir.getOutputDir()); 368 Set<String> targetIds = config.getTargetLocaleIds(dir); 369 DependencyGraph depGraph = graphMetadata.get(dir); 370 371 // TODO: Maybe calculate alias map directly into the dependency graph? 372 Map<String, String> aliasMap = getAliasMap(targetIds, dir); 373 aliasMap.forEach((s, t) -> { 374 depGraph.addAlias(s, t); 375 writeAliasFile(s, t, outDir); 376 // It's only important to record which alias files are written because of forced 377 // aliases, but since it's harmless otherwise, we just do it unconditionally. 378 // Normal alias files don't affect the empty file calculation, but forced ones can. 379 writtenLocaleIds.put(dir, s); 380 }); 381 382 calculateEmptyFiles(writtenLocaleIds.get(dir), aliasMap.values()) 383 .forEach(id -> writeEmptyFile(id, outDir, aliasMap.values())); 384 385 writeDependencyGraph(outDir, depGraph); 386 } 387 } 388 389 390 private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%"); 391 392 // Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type 393 // annotations (e.g. "languages:intvector"). We strip these when considering the element name. getBaseSegmentName(String segment)394 private static String getBaseSegmentName(String segment) { 395 int idx = PATH_MODIFIER.indexIn(segment); 396 return idx == -1 ? segment : segment.substring(0, idx); 397 } 398 399 /* 400 * There are four reasons for treating a locale ID as an alias. 401 * 1: It contains deprecated subtags (e.g. "sr_YU", which should be "sr_Cyrl_RS"). 402 * 2: It has no CLDR data but is missing a script subtag. 403 * 3: It is one of the special "phantom" alias which cannot be represented normally 404 * and must be manually mapped (e.g. legacy locale IDs which don't even parse). 405 * 4: It is a "super special" forced alias, which might replace existing aliases in 406 * some output directories. 407 */ getAliasMap(Set<String> localeIds, IcuLocaleDir dir)408 private Map<String, String> getAliasMap(Set<String> localeIds, IcuLocaleDir dir) { 409 // Even forced aliases only apply if they are in the set of locale IDs for the directory. 410 Map<String, String> forcedAliases = 411 Maps.filterKeys(config.getForcedAliases(dir), localeIds::contains); 412 413 Map<String, String> aliasMap = new LinkedHashMap<>(); 414 for (String id : localeIds) { 415 if (forcedAliases.containsKey(id)) { 416 // Forced aliases will be added later and don't need to be processed here. This 417 // is especially necessary if the ID is not structurally valid (e.g. "no_NO_NY") 418 // since that cannot be processed by the code below. 419 continue; 420 } 421 String canonicalId = supplementalData.replaceDeprecatedTags(id); 422 if (!canonicalId.equals(id)) { 423 // If the canonical form of an ID differs from the requested ID, the this is an 424 // alias, and just needs to point to the canonical ID. 425 aliasMap.put(id, canonicalId); 426 continue; 427 } 428 if (availableIds.contains(id)) { 429 // If it's canonical and supported, it's not an alias. 430 continue; 431 } 432 // If the requested locale is not supported, maximize it and alias to that. 433 String maximizedId = supplementalData.maximize(id) 434 .orElseThrow(() -> new IllegalArgumentException("unsupported locale ID: " + id)); 435 // We can't alias to ourselves and we shouldn't be here is the ID was already maximal. 436 checkArgument(!maximizedId.equals(id), "unsupported maximized locale ID: %s", id); 437 aliasMap.put(id, maximizedId); 438 } 439 // Important that we overwrite entries which might already exist here, since we might have 440 // already calculated a "natural" alias for something that we want to force (and we should 441 // replace the existing target, since that affects how we determine empty files later). 442 aliasMap.putAll(forcedAliases); 443 return aliasMap; 444 } 445 446 /* 447 * Helper to determine the correct parent ID to be written into the ICU data file. The rules 448 * are: 449 * 1: If no forced parent exists (common) write the explicit parent (if that exists) 450 * 2: If a forced parent exists, but the forced value is what you would get by just truncating 451 * the current locale ID, write nothing (ICU libraries truncate when no parent is set). 452 * 3: Write the forced parent (this is an exceptional case, and may not even occur in data). 453 */ getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir)454 private Optional<String> getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir) { 455 String forcedParentId = config.getForcedParents(dir).get(id); 456 if (forcedParentId == null) { 457 return parent; 458 } 459 return id.contains("_") && forcedParentId.regionMatches(0, id, 0, id.lastIndexOf('_')) 460 ? Optional.empty() : Optional.of(forcedParentId); 461 } 462 processSupplemental()463 private void processSupplemental() { 464 for (OutputType type : config.getOutputTypes()) { 465 if (type.getCldrType() == LDML) { 466 continue; 467 } 468 System.out.println("processing supplemental type " + type); 469 switch (type) { 470 case DAY_PERIODS: 471 write(DayPeriodsMapper.process(src), "misc"); 472 break; 473 474 case GENDER_LIST: 475 processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false); 476 break; 477 478 case LIKELY_SUBTAGS: 479 processSupplemental("likelySubtags", LIKELY_SUBTAGS_PATHS, "misc", false); 480 break; 481 482 case SUPPLEMENTAL_DATA: 483 processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true); 484 break; 485 486 case UNITS: 487 processSupplemental("units", UNITS_DATA_PATHS, "misc", true); 488 break; 489 490 case CURRENCY_DATA: 491 processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", false); 492 break; 493 494 case GRAMMATICAL_FEATURES: 495 processSupplemental("grammaticalFeatures", GRAMMATICAL_FEATURES_PATHS, "misc", false); 496 break; 497 498 case METADATA: 499 processSupplemental("metadata", METADATA_PATHS, "misc", false); 500 break; 501 502 case META_ZONES: 503 processSupplemental("metaZones", METAZONE_PATHS, "misc", false); 504 break; 505 506 case NUMBERING_SYSTEMS: 507 processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false); 508 break; 509 510 case PLURALS: 511 write(PluralsMapper.process(src), "misc"); 512 break; 513 514 case PLURAL_RANGES: 515 write(PluralRangesMapper.process(src), "misc"); 516 break; 517 518 case LOCALE_DISTANCE: 519 write(LocaleDistanceMapper.process(src), "misc"); 520 break; 521 522 case WINDOWS_ZONES: 523 processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false); 524 break; 525 526 case TRANSFORMS: 527 Path transformDir = createDirectory(config.getOutputDir().resolve("translit")); 528 write(TransformsMapper.process(src, transformDir, fileHeader), transformDir, false); 529 break; 530 531 case VERSION: 532 writeIcuVersionInfo(); 533 break; 534 535 case KEY_TYPE_DATA: 536 Bcp47Mapper.process(src).forEach(d -> write(d, "misc")); 537 break; 538 539 default: 540 throw new AssertionError("Unsupported supplemental type: " + type); 541 } 542 } 543 } 544 545 private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion"); 546 processSupplemental( String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion)547 private void processSupplemental( 548 String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion) { 549 IcuData icuData = 550 SupplementalMapper.process(src, supplementalTransformer, label, paths); 551 // A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the 552 // supplemental data XML files. 553 if (addCldrVersion) { 554 // Not the same path as used by "setVersion()" 555 icuData.add(RB_CLDR_VERSION, config.getVersionInfo().getCldrVersion()); 556 } 557 write(icuData, dir); 558 } 559 writeAliasFile(String srcId, String destId, Path dir)560 private void writeAliasFile(String srcId, String destId, Path dir) { 561 IcuData icuData = new IcuData(srcId, true); 562 icuData.add(RB_ALIAS, destId); 563 // Allow overwrite for aliases since some are "forced" and overwrite existing targets. 564 // TODO: Maybe tighten this up so only forced aliases for existing targets are overwritten. 565 write(icuData, dir, true); 566 } 567 writeEmptyFile(String id, Path dir, Collection<String> aliasTargets)568 private void writeEmptyFile(String id, Path dir, Collection<String> aliasTargets) { 569 IcuData icuData = new IcuData(id, true); 570 // TODO: Document the reason for this (i.e. why does it matter what goes into empty files?) 571 if (aliasTargets.contains(id)) { 572 icuData.setFileComment("generated alias target"); 573 icuData.add(RB_EMPTY_ALIAS, ""); 574 } else { 575 // These empty files only exist because the target of an alias has a parent locale 576 // which is itself not in the set of written ICU files. An "indirect alias target". 577 // No need to add data: Just write a resource bundle with an empty top-level table. 578 } 579 write(icuData, dir, false); 580 } 581 writeIcuVersionInfo()582 private void writeIcuVersionInfo() { 583 IcuVersionInfo versionInfo = config.getVersionInfo(); 584 IcuData versionData = new IcuData("icuver", false); 585 versionData.add(RbPath.of("ICUVersion"), versionInfo.getIcuVersion()); 586 versionData.add(RbPath.of("DataVersion"), versionInfo.getIcuDataVersion()); 587 versionData.add(RbPath.of("CLDRVersion"), versionInfo.getCldrVersion()); 588 // Write file via non-helper methods since we need to include a legacy copyright. 589 Path miscDir = config.getOutputDir().resolve("misc"); 590 createDirectory(miscDir); 591 ImmutableList<String> versionHeader = ImmutableList.<String>builder() 592 .addAll(fileHeader) 593 .add( 594 "***************************************************************************", 595 "*", 596 "* Copyright (C) 2010-2016 International Business Machines", 597 "* Corporation and others. All Rights Reserved.", 598 "*", 599 "***************************************************************************") 600 .build(); 601 IcuTextWriter.writeToFile(versionData, miscDir, versionHeader, false); 602 } 603 604 // Commonest case for writing data files in "normal" directories. write(IcuData icuData, String dir)605 private void write(IcuData icuData, String dir) { 606 write(icuData, config.getOutputDir().resolve(dir), false); 607 } 608 write(IcuData icuData, Path dir, boolean allowOverwrite)609 private void write(IcuData icuData, Path dir, boolean allowOverwrite) { 610 createDirectory(dir); 611 IcuTextWriter.writeToFile(icuData, dir, fileHeader, allowOverwrite); 612 } 613 createDirectory(Path dir)614 private Path createDirectory(Path dir) { 615 try { 616 Files.createDirectories(dir); 617 } catch (IOException e) { 618 throw new RuntimeException("cannot create directory: " + dir, e); 619 } 620 return dir; 621 } 622 writeDependencyGraph(Path dir, DependencyGraph depGraph)623 private void writeDependencyGraph(Path dir, DependencyGraph depGraph) { 624 createDirectory(dir); 625 try (BufferedWriter w = Files.newBufferedWriter(dir.resolve("LOCALE_DEPS.json"), UTF_8); 626 PrintWriter out = new PrintWriter(w)) { 627 depGraph.writeJsonTo(out, fileHeader); 628 out.flush(); 629 } catch (IOException e) { 630 throw new RuntimeException("cannot write dependency graph file: " + dir, e); 631 } 632 } 633 634 // The set of IDs to process is: 635 // * any file that was written 636 // * any alias target (not written) 637 // 638 // From which we generate the complete "closure" under the "getParent()" function. This set 639 // contains all file (written or not) which need to exist to complete the locale hierarchy. 640 // 641 // Then we remove all the written files to just leave the ones that need to be generated. 642 // This is a simple and robust approach that handles things like "gaps" in non-aliased 643 // locale IDs, where an intermediate parent is not present. calculateEmptyFiles( Set<String> writtenIds, Collection<String> aliasTargetIds)644 private ImmutableSet<String> calculateEmptyFiles( 645 Set<String> writtenIds, Collection<String> aliasTargetIds) { 646 647 Set<String> seedIds = new HashSet<>(writtenIds); 648 seedIds.addAll(aliasTargetIds); 649 // Be nice and sort the output (makes easier debugging). 650 Set<String> allIds = new TreeSet<>(); 651 for (String id : seedIds) { 652 while (!id.equals("root") && !allIds.contains(id)) { 653 allIds.add(id); 654 id = supplementalData.getParent(id); 655 } 656 } 657 return ImmutableSet.copyOf(Sets.difference(allIds, writtenIds)); 658 } 659 660 private static final ImmutableMap<String, IcuLocaleDir> LOCALE_SPLIT_INFO = 661 ImmutableMap.<String, IcuLocaleDir>builder() 662 // BRKITR 663 .put("boundaries", BRKITR) 664 .put("dictionaries", BRKITR) 665 .put("exceptions", BRKITR) 666 .put("extensions", BRKITR) 667 .put("lstm", BRKITR) 668 // COLL 669 .put("collations", COLL) 670 .put("depends", COLL) 671 .put("UCARules", COLL) 672 // CURR 673 .put("Currencies", CURR) 674 .put("CurrencyPlurals", CURR) 675 .put("CurrencyUnitPatterns", CURR) 676 .put("currencySpacing", CURR) 677 // LANG 678 .put("Keys", LANG) 679 .put("Languages", LANG) 680 .put("Scripts", LANG) 681 .put("Types", LANG) 682 .put("Variants", LANG) 683 .put("characterLabelPattern", LANG) 684 .put("codePatterns", LANG) 685 .put("localeDisplayPattern", LANG) 686 // RBNF 687 .put("RBNFRules", RBNF) 688 // REGION 689 .put("Countries", REGION) 690 // UNIT 691 .put("durationUnits", UNIT) 692 .put("units", UNIT) 693 .put("unitsShort", UNIT) 694 .put("unitsNarrow", UNIT) 695 // ZONE 696 .put("zoneStrings", ZONE) 697 .build(); 698 } 699