• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.icu;
3 import java.io.BufferedWriter;
4 import java.io.ByteArrayOutputStream;
5 import java.io.File;
6 import java.io.IOException;
7 import java.io.OutputStreamWriter;
8 import java.io.PrintWriter;
9 import java.util.Iterator;
10 import java.util.Map;
11 import java.util.Set;
13 import org.unicode.cldr.ant.CLDRConverterTool;
14 import org.unicode.cldr.draft.FileUtilities;
15 import org.unicode.cldr.util.CLDRFile;
16 import org.unicode.cldr.util.CLDRFile.DraftStatus;
17 import org.unicode.cldr.util.CLDRPaths;
18 import org.unicode.cldr.util.CldrUtility;
19 import org.unicode.cldr.util.Factory;
20 import org.unicode.cldr.util.XPathParts;
22 import com.ibm.icu.dev.tool.UOption;
23 import com.ibm.icu.dev.util.ElapsedTimer;
24 import com.ibm.icu.text.Transliterator;
26 /**
27  * Utility to generate the Tansliteration resource bundle files.
28  */
29 public class ConvertTransforms extends CLDRConverterTool {
31     private static final int HELP1 = 0,
32         HELP2 = 1,
33         SOURCEDIR = 2,
34         DESTDIR = 3,
35         MATCH = 4,
36         SKIP_COMMENTS = 5,
37         WRITE_INDEX = 6,
38         VERBOSE = 7,
39         APPROVED_ONLY = 8;
41     private static final UOption[] options = {
42         UOption.HELP_H(),
43         UOption.HELP_QUESTION_MARK(),
44         UOption.SOURCEDIR().setDefault(CLDRPaths.COMMON_DIRECTORY + "transforms/"),
45         UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "icu-transforms/"),
46         UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
47         UOption.create("commentSkip", 'c', UOption.NO_ARG),
48         UOption.create("writeIndex", 'x', UOption.NO_ARG),
49         UOption.VERBOSE(),
50         UOption.create("approvedOnly", 'a', UOption.NO_ARG),
51     };
53     static final String HELP_TEXT1 = "Use the following options" + XPathParts.NEWLINE
54         + "-h or -?\t for this message" + XPathParts.NEWLINE
55         + "-" + options[SOURCEDIR].shortName + "\t source directory. Default = -s"
56         + CldrUtility.getCanonicalName(CLDRPaths.MAIN_DIRECTORY) + XPathParts.NEWLINE
57         + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\" + XPathParts.NEWLINE
58         + "-" + options[DESTDIR].shortName + "\t destination directory. Default = -d"
59         + CldrUtility.getCanonicalName(CLDRPaths.GEN_DIRECTORY + "main/") + XPathParts.NEWLINE
60         + "-m<regex>\t to restrict the files to what matches <regex>" + XPathParts.NEWLINE
61         + "-a\t to only include transforms with approved/contributed status" + XPathParts.NEWLINE
62     // "--writeIndex / -x   to write the index (trnsfiles.mk)"+ XPathParts.NEWLINE
63     ;
65     // TODO add options to set input and output directories, matching pattern
main(String[] args)66     public static void main(String[] args) throws Exception {
67         ConvertTransforms ct = new ConvertTransforms();
68         ct.processArgs(args);
69     }
71     private boolean skipComments;
72     private boolean writeIndex = false;
73     private boolean verbose = false;
74     private boolean approvedOnly = false;
76     int fileCount = 0;
writeTransforms(String inputDirectory, String matchingPattern, String outputDirectory)78     public void writeTransforms(String inputDirectory, String matchingPattern, String outputDirectory)
79         throws IOException {
80         System.out.println(new File(inputDirectory).getCanonicalPath());
81         Factory cldrFactory = (approvedOnly) ? Factory.make(inputDirectory, matchingPattern, DraftStatus.contributed)
82             : Factory.make(inputDirectory, matchingPattern);
83         Set<String> ids = cldrFactory.getAvailable();
84         PrintWriter index = FileUtilities.openUTF8Writer(outputDirectory, "root.txt");
85         doHeader(index, "//", "root.txt");
86         try {
87             index.println("root {");
88             index.println("    RuleBasedTransliteratorIDs {");
89             // addAlias(index, "Latin", "el", "", "Latin", "Greek", "UNGEGN");
90             // addAlias(index, "el", "Latin", "", "Greek", "Latin", "UNGEGN");
91             // addAlias(index, "Latin", "Jamo", "", "Latin", "ConjoiningJamo", "");
92             addAlias(index, "Tone", "Digit", "", "Pinyin", "NumericPinyin", "");
93             addAlias(index, "Digit", "Tone", "", "NumericPinyin", "Pinyin", "");
94             // addAlias(index, "Simplified", "Traditional", "", "Hans", "Hant", "");
95             // addAlias(index, "Traditional", "Simplified", "", "Hant", "Hans", "");
96             for (String id : ids) {
97                 if (id.equals("All")) continue;
98                 try {
99                     convertFile(cldrFactory, id, outputDirectory, index);
100                 } catch (IOException e) {
101                     System.err.println("Failure in: " + id);
102                     throw e;
103                 }
104             }
105             index.println("    }");
106             index.println("    TransliteratorNamePattern {");
107             index.println("        // Format for the display name of a Transliterator.");
108             index.println("        // This is the language-neutral form of this resource.");
109             index.println("        \"{0,choice,0#|1#{1}|2#{1}-{2}}\" // Display name");
110             index.println("    }");
111             index.println("    // Transliterator display names");
112             index.println("    // This is the English form of this resource.");
113             index.println("    \"%Translit%Hex\"         { \"%Translit%Hex\" }");
114             index.println("    \"%Translit%UnicodeName\" { \"%Translit%UnicodeName\" }");
115             index.println("    \"%Translit%UnicodeChar\" { \"%Translit%UnicodeChar\" }");
116             index.println("    TransliterateLATIN{        ");
117             index.println("    \"\",");
118             index.println("    \"\"");
119             index.println("    }");
120             index.println("}");
121         } finally {
122             index.close();
123         }
124     }
makePrintWriter(ByteArrayOutputStream bytes)126     public static PrintWriter makePrintWriter(ByteArrayOutputStream bytes) {
127         try {
128             OutputStreamWriter outStream = new OutputStreamWriter(bytes, "UTF-8");
129             BufferedWriter buff = new BufferedWriter(outStream, 4 * 1024);
130             PrintWriter p = new PrintWriter(buff);
132             return p;
133         } catch (Exception e) {
134             System.err.println("Error: Could not create OutputStreamWriter.");
135         }
136         return null;
137     }
showComments(PrintWriter toilet, String value)139     private void showComments(PrintWriter toilet, String value) {
140         String[] lines = value.trim().split("\\r\\n?|\\n");
141         for (String line : lines) {
142             if (!line.startsWith("#")) {
143                 line = "# " + line;
144             }
145             toilet.println(line);
146         }
147     }
convertFile(Factory cldrFactory, String id, String outputDirectory, PrintWriter index)149     private void convertFile(Factory cldrFactory, String id, String outputDirectory, PrintWriter index)
150         throws IOException {
151         PrintWriter output = null;
152         String filename = null;
153         CLDRFile cldrFile = cldrFactory.make(id, false);
154         boolean first = true;
155         for (Iterator<String> it = cldrFile.iterator("", cldrFile.getComparator()); it.hasNext();) {
156             String path = it.next();
157             if (path.indexOf("/version") >= 0 || path.indexOf("/generation") >= 0) {
158                 continue;
159             }
160             String value = cldrFile.getStringValue(path);
161             if (first) {
162                 String fullPath = cldrFile.getFullXPath(path);
163                 filename = addIndexInfo(index, fullPath);
164                 if (filename == null) {
165                     return; // not a transform file!
166                 }
167                 output = FileUtilities.openUTF8Writer(outputDirectory, filename);
168                 doHeader(output, "#", filename);
169                 first = false;
170             }
171             if (path.indexOf("/comment") >= 0) {
172                 if (!skipComments) {
173                     showComments(output, value);
174                 }
175             } else if (path.indexOf("/tRule") >= 0) {
176                 value = fixup.transliterate(value);
177                 value = value.replaceAll(CldrUtility.LINE_SEPARATOR, System.lineSeparator());
178                 output.println(value);
179             } else {
180                 throw new IllegalArgumentException("Unknown element: " + path + "\t " + value);
181             }
182         }
183         if (output != null) { // null for transforms whose draft status is too low
184             output.close();
185         }
186     }
188     public static final Transliterator fixup = Transliterator.getInstance("[:Mn:]any-hex/java");
replaceUnquoted(String value, String toReplace, String replacement)190     public static String replaceUnquoted(String value, String toReplace, String replacement) {
191         // quick exit in most cases
192         if (value.indexOf(toReplace) < 0)
193             return value;
195         String updatedValue = "";
196         int segmentStart = 0;
197         boolean inQuotes = false;
198         boolean ignoreCharValue = false;
199         int length = value.length();
201         for (int pos = 0; pos < length; ++pos) {
202             char curChar = (char) 0;
204             if (ignoreCharValue) {
205                 ignoreCharValue = false;
206             } else {
207                 curChar = value.charAt(pos);
208             }
210             if (curChar == '\\') {
211                 // escape, ignore the value of the next char (actually the next UTF16 code unit, but that works here)
212                 ignoreCharValue = true;
213             }
214             boolean isLastChar = (pos + 1 >= length);
215             if (curChar == '\'' || isLastChar) {
216                 // quote, begin or end of a quoted literal (in which no replacement takes place)
217                 if (inQuotes) {
218                     // End of a quoted segment; guaranteed to include at least opening quote.
219                     // Just add the segment (including current char) to updatedValue.
220                     updatedValue = updatedValue + value.substring(segmentStart, pos + 1);
221                     segmentStart = pos + 1;
222                 } else {
223                     if (isLastChar)
224                         ++pos;
225                     if (pos > segmentStart) {
226                         // End of a nonempty unquoted segment; perform requested replacements and
227                         // then add segment to updatedValue.
228                         String currentSegment = value.substring(segmentStart, pos);
229                         updatedValue = updatedValue + currentSegment.replace(toReplace, replacement);
230                         segmentStart = pos;
231                     }
232                 }
233                 inQuotes = !inQuotes;
234             }
235             // else the char just becomes part of the current segment
236         }
237         return updatedValue;
238     }
addIndexInfo(PrintWriter index, String path)240     private String addIndexInfo(PrintWriter index, String path) {
241         XPathParts parts = XPathParts.getFrozenInstance(path);
242         Map<String, String> attributes = parts.findAttributes("transform");
243         if (attributes == null) return null; // error, not a transform file
244         String source = attributes.get("source");
245         String target = attributes.get("target");
246         String variant = attributes.get("variant");
247         String direction = attributes.get("direction");
248         String alias = attributes.get("alias");
249         String backwardAlias = attributes.get("backwardAlias");
250         String visibility = attributes.get("visibility");
252         String status = "internal".equals(visibility) ? "internal" : "file";
254         fileCount++;
256         String id = source + "-" + target;
257         String rid = target + "-" + source;
258         String filename = source + "_" + target;
259         if (variant != null) {
260             id += "/" + variant;
261             rid += "/" + variant;
262             filename += "_" + variant;
263         }
264         filename += ".txt";
266         if (direction.equals("both") || direction.equals("forward")) {
267             if (verbose) {
268                 System.out.println("    " + id + "    " + filename + "    " + "FORWARD");
269             }
270             if (alias != null) {
271                 for (String ali : alias.trim().split("\\s+")) {
272                     addAlias(index, ali, id);
273                 }
274             }
275             index.println("        " + id + " {");
276             index.println("            " + status + " {");
277             index.println("                resource:process(transliterator) {\"" + filename + "\"}");
278             index.println("                direction {\"FORWARD\"}");
279             index.println("            }");
280             index.println("        }");
281         }
282         if (direction.equals("both") || direction.equals("backward")) {
283             if (verbose) {
284                 System.out.println("    " + rid + "    " + filename + "    " + "REVERSE");
285             }
286             if (backwardAlias != null) {
287                 for (String bali : backwardAlias.trim().split("\\s+")) {
288                     addAlias(index, bali, rid);
289                 }
290             }
291             index.println("        " + rid + " {");
292             index.println("            " + status + " {");
293             index.println("                resource:process(transliterator) {\"" + filename + "\"}");
294             index.println("                direction {\"REVERSE\"}");
295             index.println("            }");
296             index.println("        }");
297         }
298         index.println();
299         return filename;
300     }
addAlias(PrintWriter index, String aliasSource, String aliasTarget, String aliasVariant, String originalSource, String originalTarget, String originalVariant)302     void addAlias(PrintWriter index, String aliasSource, String aliasTarget, String aliasVariant,
303         String originalSource, String originalTarget, String originalVariant) {
304         // Spacedhan-Han {
305         // alias {"null"}
306         // }
307         addAlias(index, getName(aliasSource, aliasTarget, aliasVariant),
308             getName(originalSource, originalTarget, originalVariant));
309     }
addAlias(PrintWriter index, String alias, String original)311     private void addAlias(PrintWriter index, String alias, String original) {
312         index.println("        " + alias + " {");
313         index.println("            alias" + " {\"" + original + "\"}");
314         index.println("        }");
315     }
getName(String source, String target, String variant)317     String getName(String source, String target, String variant) {
318         String id = source + "-" + target;
319         if (variant != null && variant.length() != 0) {
320             id += "/" + variant;
321         }
322         return id;
323     }
doHeader(PrintWriter output, String quoteSymbol, String filename)325     private void doHeader(PrintWriter output, String quoteSymbol, String filename) {
326         output.print('\uFEFF');
327         output.println(quoteSymbol + " © 2016 and later: Unicode, Inc. and others.");
328         output.println(quoteSymbol + " License & terms of use: http://www.unicode.org/copyright.html#License");
329         output.println(quoteSymbol);
330         output.println(quoteSymbol + " File: " + filename);
331         output.println(quoteSymbol + " Generated from CLDR");
332         output.println(quoteSymbol);
333     }
processArgs(String[] args)335     public void processArgs(String[] args) {
336         UOption.parseArgs(args, options);
337         if (options[HELP1].doesOccur || options[HELP2].doesOccur) {
338             System.out.println(HELP_TEXT1);
339             return;
340         }
342         String sourceDir = options[SOURCEDIR].value; // Utility.COMMON_DIRECTORY + "transforms/";
343         String targetDir = options[DESTDIR].value; // Utility.GEN_DIRECTORY + "main/";
344         String match = options[MATCH].value;
345         skipComments = options[SKIP_COMMENTS].doesOccur;
346         writeIndex = options[WRITE_INDEX].doesOccur;
347         verbose = options[VERBOSE].doesOccur;
348         approvedOnly = options[APPROVED_ONLY].doesOccur;
350         try {
351             if (writeIndex) {
352                 throw new InternalError("writeIndex not implemented.");
353             } else {
354                 ElapsedTimer et = new ElapsedTimer();
355                 writeTransforms(sourceDir, match, targetDir + File.separator);
356                 System.out.println("ConvertTransforms: wrote " + fileCount +
357                     " files in " + et);
358             }
359         } catch (IOException ex) {
360             RuntimeException e = new RuntimeException();
361             e.initCause(ex.getCause());
362             throw e;
363         } finally {
364             System.out.println("DONE");
365         }
366     }
369     /*
370      * private void fixData(String inputDirectory, String matchingPattern, String outputDirectory) throws IOException {
371      * File dir = new File(inputDirectory);
372      * File[] files = dir.listFiles();
373      * for (int i = 0; i < files.length; ++i) {
374      * if (files[i].isDirectory()) continue;
375      * BufferedReader input = FileUtilities.openUTF8Reader("", files[i].getCanonicalPath());
376      * PrintWriter output = FileUtilities.openUTF8Writer("", outputDirectory + files[i].getName());
377      * while (true) {
378      * String line = input.readLine();
379      * if (line == null) break;
380      * if (line.indexOf("DOCTYPE") >= 0) {
381      * line = line.replaceAll(" ldml ", " supplementalData ");
382      * }
383      * output.println(line);
384      * }
385      * input.close();
386      * output.close();
387      * }
388      * }
389      */
391 }