1 package org.unicode.cldr.icu; 2 3 import java.io.BufferedReader; 4 import java.io.File; 5 import java.io.IOException; 6 import java.util.ArrayList; 7 import java.util.Arrays; 8 import java.util.HashMap; 9 import java.util.HashSet; 10 import java.util.Iterator; 11 import java.util.List; 12 import java.util.Map; 13 import java.util.Map.Entry; 14 import java.util.Set; 15 import java.util.regex.Pattern; 16 17 import org.unicode.cldr.ant.CLDRConverterTool; 18 import org.unicode.cldr.icu.ResourceSplitter.SplitInfo; 19 import org.unicode.cldr.tool.Option; 20 import org.unicode.cldr.tool.Option.Options; 21 import org.unicode.cldr.util.CLDRFile.DraftStatus; 22 import org.unicode.cldr.util.Factory; 23 import org.unicode.cldr.util.FileReaders; 24 import org.unicode.cldr.util.PatternCache; 25 import org.unicode.cldr.util.SupplementalDataInfo; 26 27 /** 28 * Simpler mechanism for converting CLDR data to ICU Resource Bundles, intended 29 * to replace LDML2ICUConverter. The format is almost entirely data-driven 30 * instead of having lots of special-case code. 31 * 32 * The flags used to specify the data to be generated are copied directly from 33 * LDML2ICUConverter. 34 * 35 * Unlike the instructions in CLDRConverterTool, this converter does not invoke 36 * computeConvertibleXPaths to check if each xpath is convertible because the 37 * xpaths that are convertible have already been filtered out by the regex lookups. 38 * It may make more sense down the road to refactor CLDRConverterTool such that 39 * this class doesn't inherit unnecessary functionality. 40 * 41 * A rough overview of the new converter is available at 42 * https://sites.google.com/site/cldr/development/coding-cldr-tools/newldml2icuconverter 43 * 44 * @author jchye 45 */ 46 public class NewLdml2IcuConverter extends CLDRConverterTool { 47 private static final String ALIAS_PATH = "/\"%%ALIAS\""; 48 49 static final boolean DEBUG = true; 50 51 static final Pattern SEMI = PatternCache.get("\\s*+;\\s*+"); 52 53 /* 54 * The type of file to be converted. 55 */ 56 enum Type { 57 locales, dayPeriods, genderList, likelySubtags, metadata, metaZones, numberingSystems, plurals, pluralRanges, postalCodeData, rgScope, supplementalData, windowsZones, keyTypeData, brkitr, collation, rbnf; 58 } 59 60 private static final Options options = new Options( 61 "Usage: LDML2ICUConverter [OPTIONS] [FILES]\n" + 62 "This program is used to convert LDML files to ICU data text files.\n" + 63 "Please refer to the following options. Options are not case sensitive.\n" + 64 "\texample: org.unicode.cldr.icu.Ldml2IcuConverter -s xxx -d yyy en") 65 .add("sourcedir", ".*", "Source directory for CLDR files") 66 .add("destdir", ".*", ".", "Destination directory for output files, defaults to the current directory") 67 .add("specialsdir", 'p', ".*", null, "Source directory for files containing special data, if any") 68 .add("supplementaldir", 'm', ".*", null, "The supplemental data directory") 69 .add("keeptogether", 'k', null, null, 70 "Write locale data to one file instead of splitting into separate directories. For debugging") 71 .add("type", 't', "\\w+", null, "The type of file to be generated") 72 .add("xpath", 'x', ".*", null, "An optional xpath to debug the regexes with") 73 .add("filter", 'f', null, null, "Perform filtering on the locale data to be converted.") 74 .add("organization", 'o', ".*", null, "The organization to filter the data for") 75 .add("makefile", 'g', ".*", null, "If set, generates makefiles and alias files for the specified type. " + 76 "The value to set should be the name of the makefile.") 77 .add("verbose", 'v', null, null, "Debugging aids"); 78 79 private static final String LOCALES_DIR = "locales"; 80 81 private boolean keepTogether = false; 82 private Map<String, String> dirMapping; 83 private Set<String> allDirs; 84 private String sourceDir; 85 private String destinationDir; 86 private String supplementalDir; 87 private IcuDataSplitter splitter; 88 private Filter filter; 89 private boolean verbose = false; 90 91 /** 92 * Maps ICU paths to the directories they should end up in. 93 */ getDirMapping()94 private Map<String, String> getDirMapping() { 95 if (dirMapping == null) { 96 dirMapping = loadMapFromFile("ldml2icu_dir_mapping.txt"); 97 allDirs = new HashSet<String>(dirMapping.values()); 98 allDirs.remove("*"); 99 allDirs.add(LOCALES_DIR); 100 } 101 return dirMapping; 102 } 103 loadMapFromFile(String filename)104 private static Map<String, String> loadMapFromFile(String filename) { 105 Map<String, String> map = new HashMap<String, String>(); 106 BufferedReader reader = FileReaders.openFile(NewLdml2IcuConverter.class, filename); 107 String line; 108 try { 109 int lineNum = 1; 110 while ((line = reader.readLine()) != null) { 111 if (line.length() == 0 || line.startsWith("#")) continue; 112 String[] content = line.split(SEMI.toString()); 113 if (content.length != 2) { 114 throw new IllegalArgumentException("Invalid syntax of " + filename + " at line " + lineNum); 115 } 116 map.put(content[0], content[1]); 117 lineNum++; 118 } 119 } catch (IOException e) { 120 System.err.println("Failed to read fallback file."); 121 e.printStackTrace(); 122 } 123 return map; 124 } 125 loadSplitInfoFromFile()126 private List<SplitInfo> loadSplitInfoFromFile() { 127 Map<String, String> dirMapping = getDirMapping(); 128 List<SplitInfo> splitInfos = new ArrayList<SplitInfo>(); 129 for (Entry<String, String> entry : dirMapping.entrySet()) { 130 SplitInfo splitInfo = new SplitInfo(entry.getKey(), entry.getValue()); 131 splitInfos.add(splitInfo); 132 } 133 return splitInfos; 134 } 135 136 @Override processArgs(String[] args)137 public void processArgs(String[] args) { 138 Set<String> extraArgs = options.parse(args, true); 139 // For supplemental output files, the supplemental directory is specified 140 // as the source directory and the supplemental directory argument is 141 // not required. 142 if (!options.get("sourcedir").doesOccur()) { 143 throw new IllegalArgumentException("Source directory must be specified."); 144 } 145 sourceDir = options.get("sourcedir").getValue(); 146 supplementalDir = options.get("supplementaldir").getValue(); 147 148 destinationDir = options.get("destdir").getValue(); 149 if (!options.get("type").doesOccur()) { 150 throw new IllegalArgumentException("Type not specified: " + Arrays.asList(Type.values())); 151 } 152 Type type = Type.valueOf(options.get("type").getValue()); 153 keepTogether = options.get("keeptogether").doesOccur(); 154 if (!keepTogether && type == Type.supplementalData || type == Type.locales) { 155 if (splitInfos == null) { 156 splitInfos = loadSplitInfoFromFile(); 157 } 158 splitter = IcuDataSplitter.make(destinationDir, splitInfos); 159 } 160 161 verbose = options.get("verbose").doesOccur(); 162 163 String debugXPath = options.get("xpath").getValue(); 164 // Quotes are stripped out at the command line so add them back in. 165 if (debugXPath != null) { 166 debugXPath = debugXPath.replaceAll("=([^\\]\"]++)\\]", "=\"$1\"\\]"); 167 } 168 169 Factory specialFactory = null; 170 File specialsDir = null; 171 Option option = options.get("specialsdir"); 172 if (option.doesOccur()) { 173 if (type == Type.rbnf) { 174 specialsDir = new File(option.getValue()); 175 } else { 176 specialFactory = Factory.make(option.getValue(), ".*"); 177 } 178 } else if (type == Type.brkitr) { 179 specialFactory = Factory.make(options.get("specialsdir").getValue(), ".*"); 180 } 181 182 // Get list of locales if defined. 183 Set<String> includedLocales = getIncludedLocales(); 184 Map<String, String> localesMap = getLocalesMap(); 185 if (includedLocales != null && includedLocales.size() > 0) { 186 final Set<String> locales = new HashSet<String>(); 187 for (String locale : includedLocales) { 188 if (localesMap.containsKey(locale + ".xml")) { 189 locales.add(locale); 190 } 191 } 192 193 filter = new Filter() { 194 @Override 195 public boolean includes(String value) { 196 return locales.contains(value); 197 } 198 }; 199 } else if (extraArgs.size() > 0) { 200 final String regex = extraArgs.iterator().next(); 201 filter = new Filter() { 202 @Override 203 public boolean includes(String value) { 204 return value.matches(regex); 205 } 206 }; 207 } else if (type == Type.locales || type == Type.collation) { 208 throw new IllegalArgumentException( 209 "Missing locale list. Please provide a list of locales or a regex."); 210 } else { 211 filter = new Filter() { 212 @Override 213 public boolean includes(String value) { 214 return true; 215 } 216 }; 217 } 218 219 // Process files. 220 Mapper mapper = null; 221 switch (type) { 222 case locales: 223 // Generate locale data. 224 SupplementalDataInfo supplementalDataInfo = null; 225 option = options.get("supplementaldir"); 226 if (option.doesOccur()) { 227 supplementalDataInfo = SupplementalDataInfo.getInstance(supplementalDir); 228 } else { 229 throw new IllegalArgumentException("Supplemental directory must be specified with -s"); 230 } 231 232 Factory factory = Factory.make(sourceDir, ".*", DraftStatus.contributed); 233 String organization = options.get("organization").getValue(); 234 LocaleMapper localeMapper = new LocaleMapper(factory, specialFactory, 235 supplementalDataInfo, options.get("filter").doesOccur(), organization); 236 localeMapper.setDebugXPath(debugXPath); 237 mapper = localeMapper; 238 break; 239 case keyTypeData: 240 processBcp47Data(); 241 break; 242 case brkitr: 243 mapper = new BreakIteratorMapper(sourceDir, specialFactory); 244 break; 245 case collation: 246 mapper = new CollationMapper(sourceDir, specialFactory); 247 break; 248 case rbnf: 249 mapper = new RbnfMapper(new File(sourceDir), specialsDir); 250 break; 251 default: // supplemental data 252 processSupplemental(type, debugXPath); 253 } 254 255 if (mapper != null) { 256 convert(mapper); 257 option = options.get("makefile"); 258 if (option.doesOccur()) { 259 generateMakefile(mapper, option.getValue()); 260 } 261 } 262 } 263 processBcp47Data()264 private void processBcp47Data() { 265 Bcp47Mapper mapper = new Bcp47Mapper(sourceDir); 266 IcuData[] icuData = mapper.fillFromCldr(); 267 for (IcuData data : icuData) { 268 writeIcuData(data, destinationDir); 269 } 270 } 271 processSupplemental(Type type, String debugXPath)272 private void processSupplemental(Type type, String debugXPath) { 273 IcuData icuData; 274 // Use the supplementaldir if explicitly specified , otherwise the source dir. 275 String dir = options.get("supplementaldir").doesOccur() ? supplementalDir : sourceDir; 276 switch (type) { 277 case plurals: { 278 PluralsMapper mapper = new PluralsMapper(dir); 279 icuData = mapper.fillFromCldr(); 280 break; 281 } 282 case pluralRanges: { 283 PluralRangesMapper mapper = new PluralRangesMapper(dir); 284 icuData = mapper.fillFromCldr(); 285 break; 286 } 287 case dayPeriods: { 288 DayPeriodsMapper mapper = new DayPeriodsMapper(dir); 289 icuData = mapper.fillFromCldr(); 290 break; 291 } 292 default: { 293 SupplementalMapper mapper = SupplementalMapper.create(dir); 294 if (debugXPath != null) { 295 mapper.setDebugXPath(debugXPath); 296 } 297 icuData = mapper.fillFromCldr(type.toString()); 298 } 299 } 300 writeIcuData(icuData, destinationDir); 301 } 302 303 /** 304 * Writes the given IcuData object to file. 305 * 306 * @param icuData 307 * the IcuData object to be written 308 * @param outputDir 309 * the destination directory of the output file 310 */ writeIcuData(IcuData icuData, String outputDir)311 private void writeIcuData(IcuData icuData, String outputDir) { 312 if (icuData.keySet().size() == 0) { 313 throw new RuntimeException(icuData.getName() + " was not written because no data was generated."); 314 } 315 try { 316 // Split data into different directories if necessary. 317 // splitInfos is filled from the <remap> element in ICU's build.xml. 318 if (splitter == null) { 319 IcuTextWriter.writeToFile(icuData, outputDir); 320 } else { 321 String fallbackDir = new File(outputDir).getName(); 322 Map<String, IcuData> splitData = splitter.split(icuData, fallbackDir); 323 for (String dir : splitData.keySet()) { 324 IcuTextWriter.writeToFile(splitData.get(dir), outputDir + "/../" + dir); 325 } 326 } 327 } catch (IOException e) { 328 System.err.println("Error while converting " + icuData.getSourceFile()); 329 e.printStackTrace(); 330 } 331 } 332 333 /** 334 * Converts CLDR XML files using the specified mapper. 335 */ convert(Mapper mapper)336 private void convert(Mapper mapper) { 337 IcuData icuData; 338 Iterator<IcuData> iterator = mapper.iterator(filter); 339 final Type type = Type.valueOf(options.get("type").getValue()); 340 while (iterator.hasNext()) { 341 long time = System.currentTimeMillis(); 342 icuData = iterator.next(); 343 writeIcuData(icuData, destinationDir); 344 System.out.println("Converted " + type + ": " + icuData.getName() + ".xml in " + 345 (System.currentTimeMillis() - time) + "ms"); 346 } 347 } 348 349 /** 350 * Generates makefiles for files generated from the specified mapper. 351 * @param mapper 352 * @param makefileName 353 */ generateMakefile(Mapper mapper, String makefileName)354 private void generateMakefile(Mapper mapper, String makefileName) { 355 // Generate aliases and makefiles for main directory. 356 Set<String> aliases = writeSyntheticFiles(mapper.getGenerated(), destinationDir); 357 Makefile makefile = mapper.generateMakefile(aliases); 358 writeMakefile(makefile, destinationDir, makefileName); 359 if (splitter == null) return; 360 361 // Generate aliases and locales for remaining directories if a splitter was used. 362 for (String dir : splitter.getTargetDirs()) { 363 File outputDir = new File(destinationDir, "../" + dir); 364 aliases = writeSyntheticFiles(splitter.getDirSources(dir), outputDir.getAbsolutePath()); 365 makefile = splitter.generateMakefile(aliases, outputDir.getName()); 366 writeMakefile(makefile, outputDir.getAbsolutePath(), makefileName); 367 } 368 } 369 370 /** 371 * Creates all synthetic files needed by the makefile in the specified output directory. 372 * @param sources the set of source files that have already been generated 373 * @param outputDir 374 * @return 375 */ writeSyntheticFiles(Set<String> sources, String outputDir)376 private Set<String> writeSyntheticFiles(Set<String> sources, String outputDir) { 377 Set<String> targets = new HashSet<String>(); 378 if (aliasDeprecates != null) { 379 if (aliasDeprecates.emptyLocaleList != null) { 380 for (String locale : aliasDeprecates.emptyLocaleList) { 381 IcuData icuData = createEmptyFile(locale); 382 System.out.println("Empty locale created: " + locale); 383 targets.add(locale); 384 writeIcuData(icuData, outputDir); 385 } 386 } 387 if (aliasDeprecates.aliasList != null) { 388 for (Alias alias : aliasDeprecates.aliasList) { 389 try { 390 writeAlias(alias, outputDir, sources, targets); 391 } catch (IOException e) { 392 System.err.println("Error writing alias " + alias.from + "-" + alias.to); 393 System.exit(-1); 394 } 395 } 396 } 397 } 398 return targets; 399 } 400 401 /** 402 * Writes a makefile to the specified directory and filename. 403 */ writeMakefile(Makefile makefile, String outputDir, String makefileName)404 private void writeMakefile(Makefile makefile, String outputDir, String makefileName) { 405 try { 406 new File(outputDir + File.separator + makefileName).createNewFile(); 407 makefile.print(outputDir, makefileName); 408 } catch (IOException e) { 409 System.err.println("Error while writing makefile for " + outputDir + "/" + makefileName); 410 } 411 } 412 413 /** 414 * Creates an empty IcuData object to act as a placeholder for the specified alias target locale. 415 */ createEmptyFile(String locale)416 public IcuData createEmptyFile(String locale) { 417 IcuData icuData = new IcuData("icu-locale-deprecates.xml & build.xml", locale, true); 418 icuData.setFileComment("generated alias target"); 419 icuData.add("/___", ""); 420 return icuData; 421 } 422 423 /** 424 * Creates any synthetic files required for the specified alias. 425 * @param alias 426 * @param outputDir 427 * @param sources the set of sources in the output directory 428 * @param aliasTargets the alias targets already created in the output directory 429 * @throws IOException 430 */ writeAlias(Alias alias, String outputDir, Set<String> sources, Set<String> aliasTargets)431 private void writeAlias(Alias alias, String outputDir, 432 Set<String> sources, Set<String> aliasTargets) throws IOException { 433 String from = alias.from; 434 String to = alias.to; 435 // Add synthetic destination file for alias if necessary. 436 if (!sources.contains(to) && !aliasTargets.contains(to) && new File(outputDir + File.separator + alias.to + ".txt").createNewFile()) { 437 System.out.println(to + " not found, creating empty file in " + outputDir); 438 IcuTextWriter.writeToFile(createEmptyFile(alias.to), outputDir); 439 aliasTargets.add(to); 440 } 441 442 if (from == null || to == null) { 443 throw new IllegalArgumentException("Malformed alias - no 'from' or 'to': from=\"" + 444 from + "\" to=\"" + to + "\""); 445 } 446 447 if (sources.contains(from)) { 448 throw new IllegalArgumentException( 449 "Can't be both a synthetic alias locale and a real xml file - " 450 + "consider using <aliasLocale locale=\"" + from + "\"/> instead. "); 451 } 452 453 String rbPath = alias.rbPath; 454 String value = alias.value; 455 if ((rbPath == null) != (value == null)) { 456 throw new IllegalArgumentException("Incomplete alias specification for " + 457 from + "-" + to + ": both rbPath (" + 458 rbPath + ") and value (" + value + ") must be specified"); 459 } 460 461 IcuData icuData = new IcuData("icu-locale-deprecates.xml & build.xml", from, true); 462 if (rbPath == null) { 463 icuData.add(ALIAS_PATH, to); 464 } else { 465 icuData.add(rbPath, value); 466 } 467 468 if (new File(outputDir + File.separator + from + ".txt").createNewFile()) { 469 IcuTextWriter.writeToFile(icuData, outputDir); 470 aliasTargets.add(alias.from); 471 System.out.println("Created alias from " + from + " to " + to + " in " + outputDir + "."); 472 } 473 } 474 main(String[] args)475 public static void main(String[] args) throws IOException { 476 long totalTime = System.currentTimeMillis(); 477 NewLdml2IcuConverter converter = new NewLdml2IcuConverter(); 478 converter.processArgs(args); 479 System.out.println("Total time taken: " + (System.currentTimeMillis() - totalTime) + "ms"); 480 } 481 } 482