• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.icu;
2 
3 import java.io.BufferedWriter;
4 import java.io.ByteArrayOutputStream;
5 import java.io.File;
6 import java.io.IOException;
7 import java.io.OutputStreamWriter;
8 import java.io.PrintWriter;
9 import java.util.Iterator;
10 import java.util.Map;
11 import java.util.Set;
12 
13 import org.unicode.cldr.ant.CLDRConverterTool;
14 import org.unicode.cldr.draft.FileUtilities;
15 import org.unicode.cldr.util.CLDRFile;
16 import org.unicode.cldr.util.CLDRFile.DraftStatus;
17 import org.unicode.cldr.util.CLDRPaths;
18 import org.unicode.cldr.util.CldrUtility;
19 import org.unicode.cldr.util.Factory;
20 import org.unicode.cldr.util.XPathParts;
21 
22 import com.ibm.icu.dev.tool.UOption;
23 import com.ibm.icu.dev.util.ElapsedTimer;
24 import com.ibm.icu.text.Transliterator;
25 
26 /**
27  * Utility to generate the Tansliteration resource bundle files.
28  */
29 public class ConvertTransforms extends CLDRConverterTool {
30 
31     private static final int HELP1 = 0,
32         HELP2 = 1,
33         SOURCEDIR = 2,
34         DESTDIR = 3,
35         MATCH = 4,
36         SKIP_COMMENTS = 5,
37         WRITE_INDEX = 6,
38         VERBOSE = 7,
39         APPROVED_ONLY = 8;
40 
41     private static final UOption[] options = {
42         UOption.HELP_H(),
43         UOption.HELP_QUESTION_MARK(),
44         UOption.SOURCEDIR().setDefault(CLDRPaths.COMMON_DIRECTORY + "transforms/"),
45         UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "icu-transforms/"),
46         UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
47         UOption.create("commentSkip", 'c', UOption.NO_ARG),
48         UOption.create("writeIndex", 'x', UOption.NO_ARG),
49         UOption.VERBOSE(),
50         UOption.create("approvedOnly", 'a', UOption.NO_ARG),
51     };
52 
53     static final String HELP_TEXT1 = "Use the following options" + XPathParts.NEWLINE
54         + "-h or -?\t for this message" + XPathParts.NEWLINE
55         + "-" + options[SOURCEDIR].shortName + "\t source directory. Default = -s"
56         + CldrUtility.getCanonicalName(CLDRPaths.MAIN_DIRECTORY) + XPathParts.NEWLINE
57         + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\" + XPathParts.NEWLINE
58         + "-" + options[DESTDIR].shortName + "\t destination directory. Default = -d"
59         + CldrUtility.getCanonicalName(CLDRPaths.GEN_DIRECTORY + "main/") + XPathParts.NEWLINE
60         + "-m<regex>\t to restrict the files to what matches <regex>" + XPathParts.NEWLINE
61         + "-a\t to only include transforms with approved/contributed status" + XPathParts.NEWLINE
62     // "--writeIndex / -x   to write the index (trnsfiles.mk)"+ XPathParts.NEWLINE
63     ;
64 
65     // TODO add options to set input and output directories, matching pattern
main(String[] args)66     public static void main(String[] args) throws Exception {
67         ConvertTransforms ct = new ConvertTransforms();
68         ct.processArgs(args);
69     }
70 
71     private boolean skipComments;
72     private boolean writeIndex = false;
73     private boolean verbose = false;
74     private boolean approvedOnly = false;
75 
76     int fileCount = 0;
77 
writeTransforms(String inputDirectory, String matchingPattern, String outputDirectory)78     public void writeTransforms(String inputDirectory, String matchingPattern, String outputDirectory)
79         throws IOException {
80         System.out.println(new File(inputDirectory).getCanonicalPath());
81         Factory cldrFactory = (approvedOnly) ? Factory.make(inputDirectory, matchingPattern, DraftStatus.contributed)
82             : Factory.make(inputDirectory, matchingPattern);
83         Set<String> ids = cldrFactory.getAvailable();
84         PrintWriter index = FileUtilities.openUTF8Writer(outputDirectory, "root.txt");
85         doHeader(index, "//", "root.txt");
86         try {
87             index.println("root {");
88             index.println("    RuleBasedTransliteratorIDs {");
89             // addAlias(index, "Latin", "el", "", "Latin", "Greek", "UNGEGN");
90             // addAlias(index, "el", "Latin", "", "Greek", "Latin", "UNGEGN");
91             // addAlias(index, "Latin", "Jamo", "", "Latin", "ConjoiningJamo", "");
92             addAlias(index, "Tone", "Digit", "", "Pinyin", "NumericPinyin", "");
93             addAlias(index, "Digit", "Tone", "", "NumericPinyin", "Pinyin", "");
94             // addAlias(index, "Simplified", "Traditional", "", "Hans", "Hant", "");
95             // addAlias(index, "Traditional", "Simplified", "", "Hant", "Hans", "");
96             for (String id : ids) {
97                 if (id.equals("All")) continue;
98                 try {
99                     convertFile(cldrFactory, id, outputDirectory, index);
100                 } catch (IOException e) {
101                     System.err.println("Failure in: " + id);
102                     throw e;
103                 }
104             }
105             index.println("    }");
106             index.println("    TransliteratorNamePattern {");
107             index.println("        // Format for the display name of a Transliterator.");
108             index.println("        // This is the language-neutral form of this resource.");
109             index.println("        \"{0,choice,0#|1#{1}|2#{1}-{2}}\" // Display name");
110             index.println("    }");
111             index.println("    // Transliterator display names");
112             index.println("    // This is the English form of this resource.");
113             index.println("    \"%Translit%Hex\"         { \"%Translit%Hex\" }");
114             index.println("    \"%Translit%UnicodeName\" { \"%Translit%UnicodeName\" }");
115             index.println("    \"%Translit%UnicodeChar\" { \"%Translit%UnicodeChar\" }");
116             index.println("    TransliterateLATIN{        ");
117             index.println("    \"\",");
118             index.println("    \"\"");
119             index.println("    }");
120             index.println("}");
121         } finally {
122             index.close();
123         }
124     }
125 
makePrintWriter(ByteArrayOutputStream bytes)126     public static PrintWriter makePrintWriter(ByteArrayOutputStream bytes) {
127         try {
128             OutputStreamWriter outStream = new OutputStreamWriter(bytes, "UTF-8");
129             BufferedWriter buff = new BufferedWriter(outStream, 4 * 1024);
130             PrintWriter p = new PrintWriter(buff);
131 
132             return p;
133         } catch (Exception e) {
134             System.err.println("Error: Could not create OutputStreamWriter.");
135         }
136         return null;
137     }
138 
showComments(PrintWriter toilet, String value)139     private void showComments(PrintWriter toilet, String value) {
140         String[] lines = value.trim().split("\\r\\n?|\\n");
141         for (String line : lines) {
142             if (!line.startsWith("#")) {
143                 line = "# " + line;
144             }
145             toilet.println(line);
146         }
147     }
148 
convertFile(Factory cldrFactory, String id, String outputDirectory, PrintWriter index)149     private void convertFile(Factory cldrFactory, String id, String outputDirectory, PrintWriter index)
150         throws IOException {
151         PrintWriter output = null;
152         String filename = null;
153         CLDRFile cldrFile = cldrFactory.make(id, false);
154         boolean first = true;
155         for (Iterator<String> it = cldrFile.iterator("", cldrFile.getComparator()); it.hasNext();) {
156             String path = it.next();
157             if (path.indexOf("/version") >= 0 || path.indexOf("/generation") >= 0) {
158                 continue;
159             }
160             String value = cldrFile.getStringValue(path);
161             if (first) {
162                 String fullPath = cldrFile.getFullXPath(path);
163                 filename = addIndexInfo(index, fullPath, id);
164                 if (filename == null) return; // not a transform file!
165                 output = FileUtilities.openUTF8Writer(outputDirectory, filename);
166                 doHeader(output, "#", filename);
167                 first = false;
168             }
169             if (path.indexOf("/comment") >= 0) {
170                 if (!skipComments) {
171                     showComments(output, value);
172                 }
173             } else if (path.indexOf("/tRule") >= 0) {
174                 value = fixup.transliterate(value);
175                 value = value.replaceAll(CldrUtility.LINE_SEPARATOR, System.lineSeparator());
176                 output.println(value);
177             } else {
178                 throw new IllegalArgumentException("Unknown element: " + path + "\t " + value);
179             }
180         }
181         if (output != null) { // null for transforms whose draft status is too low
182             output.close();
183         }
184     }
185 
186     public static final Transliterator fixup = Transliterator.getInstance("[:Mn:]any-hex/java");
187 
replaceUnquoted(String value, String toReplace, String replacement)188     public static String replaceUnquoted(String value, String toReplace, String replacement) {
189         // quick exit in most cases
190         if (value.indexOf(toReplace) < 0)
191             return value;
192 
193         String updatedValue = "";
194         int segmentStart = 0;
195         boolean inQuotes = false;
196         boolean ignoreCharValue = false;
197         int length = value.length();
198 
199         for (int pos = 0; pos < length; ++pos) {
200             char curChar = (char) 0;
201 
202             if (ignoreCharValue) {
203                 ignoreCharValue = false;
204             } else {
205                 curChar = value.charAt(pos);
206             }
207 
208             if (curChar == '\\') {
209                 // escape, ignore the value of the next char (actually the next UTF16 code unit, but that works here)
210                 ignoreCharValue = true;
211             }
212             boolean isLastChar = (pos + 1 >= length);
213             if (curChar == '\'' || isLastChar) {
214                 // quote, begin or end of a quoted literal (in which no replacement takes place)
215                 if (inQuotes) {
216                     // End of a quoted segment; guaranteed to include at least opening quote.
217                     // Just add the segment (including current char) to updatedValue.
218                     updatedValue = updatedValue + value.substring(segmentStart, pos + 1);
219                     segmentStart = pos + 1;
220                 } else {
221                     if (isLastChar)
222                         ++pos;
223                     if (pos > segmentStart) {
224                         // End of a nonempty unquoted segment; perform requested replacements and
225                         // then add segment to updatedValue.
226                         String currentSegment = value.substring(segmentStart, pos);
227                         updatedValue = updatedValue + currentSegment.replace(toReplace, replacement);
228                         segmentStart = pos;
229                     }
230                 }
231                 inQuotes = !inQuotes;
232             }
233             // else the char just becomes part of the current segment
234         }
235         return updatedValue;
236     }
237 
238     static XPathParts parts = new XPathParts();
239 
addIndexInfo(PrintWriter index, String path, String transID)240     private String addIndexInfo(PrintWriter index, String path, String transID) {
241         parts.set(path);
242         Map<String, String> attributes = parts.findAttributes("transform");
243         if (attributes == null) return null; // error, not a transform file
244         String source = attributes.get("source");
245         String target = attributes.get("target");
246         String variant = attributes.get("variant");
247         String direction = attributes.get("direction");
248         String alias = attributes.get("alias");
249         String backwardAlias = attributes.get("backwardAlias");
250         String visibility = attributes.get("visibility");
251 
252         String status = "internal".equals(visibility) ? "internal" : "file";
253 
254         fileCount++;
255 
256         String id = source + "-" + target;
257         String rid = target + "-" + source;
258         String filename = source + "_" + target;
259         if (variant != null) {
260             id += "/" + variant;
261             rid += "/" + variant;
262             filename += "_" + variant;
263         }
264         filename += ".txt";
265 
266         if (direction.equals("both") || direction.equals("forward")) {
267             if (verbose) {
268                 System.out.println("    " + id + "    " + filename + "    " + "FORWARD");
269             }
270             if (alias != null) {
271                 for (String ali : alias.trim().split("\\s+")) {
272                     addAlias(index, ali, id);
273                 }
274             }
275             index.println("        " + id + " {");
276             index.println("            " + status + " {");
277             index.println("                resource:process(transliterator) {\"" + filename + "\"}");
278             index.println("                direction {\"FORWARD\"}");
279             index.println("            }");
280             index.println("        }");
281         }
282         if (direction.equals("both") || direction.equals("backward")) {
283             if (verbose) {
284                 System.out.println("    " + rid + "    " + filename + "    " + "REVERSE");
285             }
286             if (backwardAlias != null) {
287                 for (String bali : backwardAlias.trim().split("\\s+")) {
288                     addAlias(index, bali, rid);
289                 }
290             }
291             index.println("        " + rid + " {");
292             index.println("            " + status + " {");
293             index.println("                resource:process(transliterator) {\"" + filename + "\"}");
294             index.println("                direction {\"REVERSE\"}");
295             index.println("            }");
296             index.println("        }");
297         }
298         index.println();
299         return filename;
300     }
301 
addAlias(PrintWriter index, String aliasSource, String aliasTarget, String aliasVariant, String originalSource, String originalTarget, String originalVariant)302     void addAlias(PrintWriter index, String aliasSource, String aliasTarget, String aliasVariant,
303         String originalSource, String originalTarget, String originalVariant) {
304         // Spacedhan-Han {
305         // alias {"null"}
306         // }
307         addAlias(index, getName(aliasSource, aliasTarget, aliasVariant),
308             getName(originalSource, originalTarget, originalVariant));
309     }
310 
addAlias(PrintWriter index, String alias, String original)311     private void addAlias(PrintWriter index, String alias, String original) {
312         index.println("        " + alias + " {");
313         index.println("            alias" + " {\"" + original + "\"}");
314         index.println("        }");
315     }
316 
getName(String source, String target, String variant)317     String getName(String source, String target, String variant) {
318         String id = source + "-" + target;
319         if (variant != null && variant.length() != 0) {
320             id += "/" + variant;
321         }
322         return id;
323     }
324 
doHeader(PrintWriter output, String quoteSymbol, String filename)325     private void doHeader(PrintWriter output, String quoteSymbol, String filename) {
326         output.print('\uFEFF');
327         output.println(quoteSymbol + " © 2016 and later: Unicode, Inc. and others.");
328         output.println(quoteSymbol + " License & terms of use: http://www.unicode.org/copyright.html#License");
329         output.println(quoteSymbol);
330         output.println(quoteSymbol + " File: " + filename);
331         output.println(quoteSymbol + " Generated from CLDR");
332         output.println(quoteSymbol);
333     }
334 
processArgs(String[] args)335     public void processArgs(String[] args) {
336         UOption.parseArgs(args, options);
337         if (options[HELP1].doesOccur || options[HELP2].doesOccur) {
338             System.out.println(HELP_TEXT1);
339             return;
340         }
341 
342         String sourceDir = options[SOURCEDIR].value; // Utility.COMMON_DIRECTORY + "transforms/";
343         String targetDir = options[DESTDIR].value; // Utility.GEN_DIRECTORY + "main/";
344         String match = options[MATCH].value;
345         skipComments = options[SKIP_COMMENTS].doesOccur;
346         writeIndex = options[WRITE_INDEX].doesOccur;
347         verbose = options[VERBOSE].doesOccur;
348         approvedOnly = options[APPROVED_ONLY].doesOccur;
349 
350         try {
351             if (writeIndex) {
352                 throw new InternalError("writeIndex not implemented.");
353             } else {
354                 ElapsedTimer et = new ElapsedTimer();
355                 writeTransforms(sourceDir, match, targetDir + File.separator);
356                 System.out.println("ConvertTransforms: wrote " + fileCount +
357                     " files in " + et);
358             }
359         } catch (IOException ex) {
360             RuntimeException e = new RuntimeException();
361             e.initCause(ex.getCause());
362             throw e;
363         } finally {
364             System.out.println("DONE");
365         }
366     }
367 
368     // fixData ONLY NEEDED TO FIX FILE PROBLEM
369     /*
370      * private void fixData(String inputDirectory, String matchingPattern, String outputDirectory) throws IOException {
371      * File dir = new File(inputDirectory);
372      * File[] files = dir.listFiles();
373      * for (int i = 0; i < files.length; ++i) {
374      * if (files[i].isDirectory()) continue;
375      * BufferedReader input = FileUtilities.openUTF8Reader("", files[i].getCanonicalPath());
376      * PrintWriter output = FileUtilities.openUTF8Writer("", outputDirectory + files[i].getName());
377      * while (true) {
378      * String line = input.readLine();
379      * if (line == null) break;
380      * if (line.indexOf("DOCTYPE") >= 0) {
381      * line = line.replaceAll(" ldml ", " supplementalData ");
382      * }
383      * output.println(line);
384      * }
385      * input.close();
386      * output.close();
387      * }
388      * }
389      */
390 
391 }
392