1 package org.unicode.cldr.icu; 2 3 import java.io.BufferedWriter; 4 import java.io.ByteArrayOutputStream; 5 import java.io.File; 6 import java.io.IOException; 7 import java.io.OutputStreamWriter; 8 import java.io.PrintWriter; 9 import java.util.Iterator; 10 import java.util.Map; 11 import java.util.Set; 12 13 import org.unicode.cldr.ant.CLDRConverterTool; 14 import org.unicode.cldr.draft.FileUtilities; 15 import org.unicode.cldr.util.CLDRFile; 16 import org.unicode.cldr.util.CLDRFile.DraftStatus; 17 import org.unicode.cldr.util.CLDRPaths; 18 import org.unicode.cldr.util.CldrUtility; 19 import org.unicode.cldr.util.Factory; 20 import org.unicode.cldr.util.XPathParts; 21 22 import com.ibm.icu.dev.tool.UOption; 23 import com.ibm.icu.dev.util.ElapsedTimer; 24 import com.ibm.icu.text.Transliterator; 25 26 /** 27 * Utility to generate the Tansliteration resource bundle files. 28 */ 29 public class ConvertTransforms extends CLDRConverterTool { 30 31 private static final int HELP1 = 0, 32 HELP2 = 1, 33 SOURCEDIR = 2, 34 DESTDIR = 3, 35 MATCH = 4, 36 SKIP_COMMENTS = 5, 37 WRITE_INDEX = 6, 38 VERBOSE = 7, 39 APPROVED_ONLY = 8; 40 41 private static final UOption[] options = { 42 UOption.HELP_H(), 43 UOption.HELP_QUESTION_MARK(), 44 UOption.SOURCEDIR().setDefault(CLDRPaths.COMMON_DIRECTORY + "transforms/"), 45 UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "icu-transforms/"), 46 UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), 47 UOption.create("commentSkip", 'c', UOption.NO_ARG), 48 UOption.create("writeIndex", 'x', UOption.NO_ARG), 49 UOption.VERBOSE(), 50 UOption.create("approvedOnly", 'a', UOption.NO_ARG), 51 }; 52 53 static final String HELP_TEXT1 = "Use the following options" + XPathParts.NEWLINE 54 + "-h or -?\t for this message" + XPathParts.NEWLINE 55 + "-" + options[SOURCEDIR].shortName + "\t source directory. Default = -s" 56 + CldrUtility.getCanonicalName(CLDRPaths.MAIN_DIRECTORY) + XPathParts.NEWLINE 57 + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\" + XPathParts.NEWLINE 58 + "-" + options[DESTDIR].shortName + "\t destination directory. Default = -d" 59 + CldrUtility.getCanonicalName(CLDRPaths.GEN_DIRECTORY + "main/") + XPathParts.NEWLINE 60 + "-m<regex>\t to restrict the files to what matches <regex>" + XPathParts.NEWLINE 61 + "-a\t to only include transforms with approved/contributed status" + XPathParts.NEWLINE 62 // "--writeIndex / -x to write the index (trnsfiles.mk)"+ XPathParts.NEWLINE 63 ; 64 65 // TODO add options to set input and output directories, matching pattern main(String[] args)66 public static void main(String[] args) throws Exception { 67 ConvertTransforms ct = new ConvertTransforms(); 68 ct.processArgs(args); 69 } 70 71 private boolean skipComments; 72 private boolean writeIndex = false; 73 private boolean verbose = false; 74 private boolean approvedOnly = false; 75 76 int fileCount = 0; 77 writeTransforms(String inputDirectory, String matchingPattern, String outputDirectory)78 public void writeTransforms(String inputDirectory, String matchingPattern, String outputDirectory) 79 throws IOException { 80 System.out.println(new File(inputDirectory).getCanonicalPath()); 81 Factory cldrFactory = (approvedOnly) ? Factory.make(inputDirectory, matchingPattern, DraftStatus.contributed) 82 : Factory.make(inputDirectory, matchingPattern); 83 Set<String> ids = cldrFactory.getAvailable(); 84 PrintWriter index = FileUtilities.openUTF8Writer(outputDirectory, "root.txt"); 85 doHeader(index, "//", "root.txt"); 86 try { 87 index.println("root {"); 88 index.println(" RuleBasedTransliteratorIDs {"); 89 // addAlias(index, "Latin", "el", "", "Latin", "Greek", "UNGEGN"); 90 // addAlias(index, "el", "Latin", "", "Greek", "Latin", "UNGEGN"); 91 // addAlias(index, "Latin", "Jamo", "", "Latin", "ConjoiningJamo", ""); 92 addAlias(index, "Tone", "Digit", "", "Pinyin", "NumericPinyin", ""); 93 addAlias(index, "Digit", "Tone", "", "NumericPinyin", "Pinyin", ""); 94 // addAlias(index, "Simplified", "Traditional", "", "Hans", "Hant", ""); 95 // addAlias(index, "Traditional", "Simplified", "", "Hant", "Hans", ""); 96 for (String id : ids) { 97 if (id.equals("All")) continue; 98 try { 99 convertFile(cldrFactory, id, outputDirectory, index); 100 } catch (IOException e) { 101 System.err.println("Failure in: " + id); 102 throw e; 103 } 104 } 105 index.println(" }"); 106 index.println(" TransliteratorNamePattern {"); 107 index.println(" // Format for the display name of a Transliterator."); 108 index.println(" // This is the language-neutral form of this resource."); 109 index.println(" \"{0,choice,0#|1#{1}|2#{1}-{2}}\" // Display name"); 110 index.println(" }"); 111 index.println(" // Transliterator display names"); 112 index.println(" // This is the English form of this resource."); 113 index.println(" \"%Translit%Hex\" { \"%Translit%Hex\" }"); 114 index.println(" \"%Translit%UnicodeName\" { \"%Translit%UnicodeName\" }"); 115 index.println(" \"%Translit%UnicodeChar\" { \"%Translit%UnicodeChar\" }"); 116 index.println(" TransliterateLATIN{ "); 117 index.println(" \"\","); 118 index.println(" \"\""); 119 index.println(" }"); 120 index.println("}"); 121 } finally { 122 index.close(); 123 } 124 } 125 makePrintWriter(ByteArrayOutputStream bytes)126 public static PrintWriter makePrintWriter(ByteArrayOutputStream bytes) { 127 try { 128 OutputStreamWriter outStream = new OutputStreamWriter(bytes, "UTF-8"); 129 BufferedWriter buff = new BufferedWriter(outStream, 4 * 1024); 130 PrintWriter p = new PrintWriter(buff); 131 132 return p; 133 } catch (Exception e) { 134 System.err.println("Error: Could not create OutputStreamWriter."); 135 } 136 return null; 137 } 138 showComments(PrintWriter toilet, String value)139 private void showComments(PrintWriter toilet, String value) { 140 String[] lines = value.trim().split("\\r\\n?|\\n"); 141 for (String line : lines) { 142 if (!line.startsWith("#")) { 143 line = "# " + line; 144 } 145 toilet.println(line); 146 } 147 } 148 convertFile(Factory cldrFactory, String id, String outputDirectory, PrintWriter index)149 private void convertFile(Factory cldrFactory, String id, String outputDirectory, PrintWriter index) 150 throws IOException { 151 PrintWriter output = null; 152 String filename = null; 153 CLDRFile cldrFile = cldrFactory.make(id, false); 154 boolean first = true; 155 for (Iterator<String> it = cldrFile.iterator("", cldrFile.getComparator()); it.hasNext();) { 156 String path = it.next(); 157 if (path.indexOf("/version") >= 0 || path.indexOf("/generation") >= 0) { 158 continue; 159 } 160 String value = cldrFile.getStringValue(path); 161 if (first) { 162 String fullPath = cldrFile.getFullXPath(path); 163 filename = addIndexInfo(index, fullPath, id); 164 if (filename == null) return; // not a transform file! 165 output = FileUtilities.openUTF8Writer(outputDirectory, filename); 166 doHeader(output, "#", filename); 167 first = false; 168 } 169 if (path.indexOf("/comment") >= 0) { 170 if (!skipComments) { 171 showComments(output, value); 172 } 173 } else if (path.indexOf("/tRule") >= 0) { 174 value = fixup.transliterate(value); 175 value = value.replaceAll(CldrUtility.LINE_SEPARATOR, System.lineSeparator()); 176 output.println(value); 177 } else { 178 throw new IllegalArgumentException("Unknown element: " + path + "\t " + value); 179 } 180 } 181 if (output != null) { // null for transforms whose draft status is too low 182 output.close(); 183 } 184 } 185 186 public static final Transliterator fixup = Transliterator.getInstance("[:Mn:]any-hex/java"); 187 replaceUnquoted(String value, String toReplace, String replacement)188 public static String replaceUnquoted(String value, String toReplace, String replacement) { 189 // quick exit in most cases 190 if (value.indexOf(toReplace) < 0) 191 return value; 192 193 String updatedValue = ""; 194 int segmentStart = 0; 195 boolean inQuotes = false; 196 boolean ignoreCharValue = false; 197 int length = value.length(); 198 199 for (int pos = 0; pos < length; ++pos) { 200 char curChar = (char) 0; 201 202 if (ignoreCharValue) { 203 ignoreCharValue = false; 204 } else { 205 curChar = value.charAt(pos); 206 } 207 208 if (curChar == '\\') { 209 // escape, ignore the value of the next char (actually the next UTF16 code unit, but that works here) 210 ignoreCharValue = true; 211 } 212 boolean isLastChar = (pos + 1 >= length); 213 if (curChar == '\'' || isLastChar) { 214 // quote, begin or end of a quoted literal (in which no replacement takes place) 215 if (inQuotes) { 216 // End of a quoted segment; guaranteed to include at least opening quote. 217 // Just add the segment (including current char) to updatedValue. 218 updatedValue = updatedValue + value.substring(segmentStart, pos + 1); 219 segmentStart = pos + 1; 220 } else { 221 if (isLastChar) 222 ++pos; 223 if (pos > segmentStart) { 224 // End of a nonempty unquoted segment; perform requested replacements and 225 // then add segment to updatedValue. 226 String currentSegment = value.substring(segmentStart, pos); 227 updatedValue = updatedValue + currentSegment.replace(toReplace, replacement); 228 segmentStart = pos; 229 } 230 } 231 inQuotes = !inQuotes; 232 } 233 // else the char just becomes part of the current segment 234 } 235 return updatedValue; 236 } 237 238 static XPathParts parts = new XPathParts(); 239 addIndexInfo(PrintWriter index, String path, String transID)240 private String addIndexInfo(PrintWriter index, String path, String transID) { 241 parts.set(path); 242 Map<String, String> attributes = parts.findAttributes("transform"); 243 if (attributes == null) return null; // error, not a transform file 244 String source = attributes.get("source"); 245 String target = attributes.get("target"); 246 String variant = attributes.get("variant"); 247 String direction = attributes.get("direction"); 248 String alias = attributes.get("alias"); 249 String backwardAlias = attributes.get("backwardAlias"); 250 String visibility = attributes.get("visibility"); 251 252 String status = "internal".equals(visibility) ? "internal" : "file"; 253 254 fileCount++; 255 256 String id = source + "-" + target; 257 String rid = target + "-" + source; 258 String filename = source + "_" + target; 259 if (variant != null) { 260 id += "/" + variant; 261 rid += "/" + variant; 262 filename += "_" + variant; 263 } 264 filename += ".txt"; 265 266 if (direction.equals("both") || direction.equals("forward")) { 267 if (verbose) { 268 System.out.println(" " + id + " " + filename + " " + "FORWARD"); 269 } 270 if (alias != null) { 271 for (String ali : alias.trim().split("\\s+")) { 272 addAlias(index, ali, id); 273 } 274 } 275 index.println(" " + id + " {"); 276 index.println(" " + status + " {"); 277 index.println(" resource:process(transliterator) {\"" + filename + "\"}"); 278 index.println(" direction {\"FORWARD\"}"); 279 index.println(" }"); 280 index.println(" }"); 281 } 282 if (direction.equals("both") || direction.equals("backward")) { 283 if (verbose) { 284 System.out.println(" " + rid + " " + filename + " " + "REVERSE"); 285 } 286 if (backwardAlias != null) { 287 for (String bali : backwardAlias.trim().split("\\s+")) { 288 addAlias(index, bali, rid); 289 } 290 } 291 index.println(" " + rid + " {"); 292 index.println(" " + status + " {"); 293 index.println(" resource:process(transliterator) {\"" + filename + "\"}"); 294 index.println(" direction {\"REVERSE\"}"); 295 index.println(" }"); 296 index.println(" }"); 297 } 298 index.println(); 299 return filename; 300 } 301 addAlias(PrintWriter index, String aliasSource, String aliasTarget, String aliasVariant, String originalSource, String originalTarget, String originalVariant)302 void addAlias(PrintWriter index, String aliasSource, String aliasTarget, String aliasVariant, 303 String originalSource, String originalTarget, String originalVariant) { 304 // Spacedhan-Han { 305 // alias {"null"} 306 // } 307 addAlias(index, getName(aliasSource, aliasTarget, aliasVariant), 308 getName(originalSource, originalTarget, originalVariant)); 309 } 310 addAlias(PrintWriter index, String alias, String original)311 private void addAlias(PrintWriter index, String alias, String original) { 312 index.println(" " + alias + " {"); 313 index.println(" alias" + " {\"" + original + "\"}"); 314 index.println(" }"); 315 } 316 getName(String source, String target, String variant)317 String getName(String source, String target, String variant) { 318 String id = source + "-" + target; 319 if (variant != null && variant.length() != 0) { 320 id += "/" + variant; 321 } 322 return id; 323 } 324 doHeader(PrintWriter output, String quoteSymbol, String filename)325 private void doHeader(PrintWriter output, String quoteSymbol, String filename) { 326 output.print('\uFEFF'); 327 output.println(quoteSymbol + " © 2016 and later: Unicode, Inc. and others."); 328 output.println(quoteSymbol + " License & terms of use: http://www.unicode.org/copyright.html#License"); 329 output.println(quoteSymbol); 330 output.println(quoteSymbol + " File: " + filename); 331 output.println(quoteSymbol + " Generated from CLDR"); 332 output.println(quoteSymbol); 333 } 334 processArgs(String[] args)335 public void processArgs(String[] args) { 336 UOption.parseArgs(args, options); 337 if (options[HELP1].doesOccur || options[HELP2].doesOccur) { 338 System.out.println(HELP_TEXT1); 339 return; 340 } 341 342 String sourceDir = options[SOURCEDIR].value; // Utility.COMMON_DIRECTORY + "transforms/"; 343 String targetDir = options[DESTDIR].value; // Utility.GEN_DIRECTORY + "main/"; 344 String match = options[MATCH].value; 345 skipComments = options[SKIP_COMMENTS].doesOccur; 346 writeIndex = options[WRITE_INDEX].doesOccur; 347 verbose = options[VERBOSE].doesOccur; 348 approvedOnly = options[APPROVED_ONLY].doesOccur; 349 350 try { 351 if (writeIndex) { 352 throw new InternalError("writeIndex not implemented."); 353 } else { 354 ElapsedTimer et = new ElapsedTimer(); 355 writeTransforms(sourceDir, match, targetDir + File.separator); 356 System.out.println("ConvertTransforms: wrote " + fileCount + 357 " files in " + et); 358 } 359 } catch (IOException ex) { 360 RuntimeException e = new RuntimeException(); 361 e.initCause(ex.getCause()); 362 throw e; 363 } finally { 364 System.out.println("DONE"); 365 } 366 } 367 368 // fixData ONLY NEEDED TO FIX FILE PROBLEM 369 /* 370 * private void fixData(String inputDirectory, String matchingPattern, String outputDirectory) throws IOException { 371 * File dir = new File(inputDirectory); 372 * File[] files = dir.listFiles(); 373 * for (int i = 0; i < files.length; ++i) { 374 * if (files[i].isDirectory()) continue; 375 * BufferedReader input = FileUtilities.openUTF8Reader("", files[i].getCanonicalPath()); 376 * PrintWriter output = FileUtilities.openUTF8Writer("", outputDirectory + files[i].getName()); 377 * while (true) { 378 * String line = input.readLine(); 379 * if (line == null) break; 380 * if (line.indexOf("DOCTYPE") >= 0) { 381 * line = line.replaceAll(" ldml ", " supplementalData "); 382 * } 383 * output.println(line); 384 * } 385 * input.close(); 386 * output.close(); 387 * } 388 * } 389 */ 390 391 } 392