• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.icu;
2 
3 import java.io.BufferedWriter;
4 import java.io.ByteArrayOutputStream;
5 import java.io.File;
6 import java.io.IOException;
7 import java.io.OutputStreamWriter;
8 import java.io.PrintWriter;
9 import java.util.Iterator;
10 import java.util.Map;
11 import java.util.Set;
12 
13 import org.unicode.cldr.ant.CLDRConverterTool;
14 import org.unicode.cldr.draft.FileUtilities;
15 import org.unicode.cldr.util.CLDRFile;
16 import org.unicode.cldr.util.CLDRFile.DraftStatus;
17 import org.unicode.cldr.util.CLDRPaths;
18 import org.unicode.cldr.util.CldrUtility;
19 import org.unicode.cldr.util.Factory;
20 import org.unicode.cldr.util.XPathParts;
21 
22 import com.ibm.icu.dev.tool.UOption;
23 import com.ibm.icu.dev.util.ElapsedTimer;
24 import com.ibm.icu.text.Transliterator;
25 
26 /**
27  * Utility to generate the Tansliteration resource bundle files.
28  */
29 public class ConvertTransforms extends CLDRConverterTool {
30 
31     private static final int HELP1 = 0,
32         HELP2 = 1,
33         SOURCEDIR = 2,
34         DESTDIR = 3,
35         MATCH = 4,
36         SKIP_COMMENTS = 5,
37         WRITE_INDEX = 6,
38         VERBOSE = 7,
39         APPROVED_ONLY = 8;
40 
41     private static final UOption[] options = {
42         UOption.HELP_H(),
43         UOption.HELP_QUESTION_MARK(),
44         UOption.SOURCEDIR().setDefault(CLDRPaths.COMMON_DIRECTORY + "transforms/"),
45         UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "icu-transforms/"),
46         UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
47         UOption.create("commentSkip", 'c', UOption.NO_ARG),
48         UOption.create("writeIndex", 'x', UOption.NO_ARG),
49         UOption.VERBOSE(),
50         UOption.create("approvedOnly", 'a', UOption.NO_ARG),
51     };
52 
53     static final String HELP_TEXT1 = "Use the following options" + XPathParts.NEWLINE
54         + "-h or -?\t for this message" + XPathParts.NEWLINE
55         + "-" + options[SOURCEDIR].shortName + "\t source directory. Default = -s"
56         + CldrUtility.getCanonicalName(CLDRPaths.MAIN_DIRECTORY) + XPathParts.NEWLINE
57         + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\" + XPathParts.NEWLINE
58         + "-" + options[DESTDIR].shortName + "\t destination directory. Default = -d"
59         + CldrUtility.getCanonicalName(CLDRPaths.GEN_DIRECTORY + "main/") + XPathParts.NEWLINE
60         + "-m<regex>\t to restrict the files to what matches <regex>" + XPathParts.NEWLINE
61         + "-a\t to only include transforms with approved/contributed status" + XPathParts.NEWLINE
62     // "--writeIndex / -x   to write the index (trnsfiles.mk)"+ XPathParts.NEWLINE
63     ;
64 
65     // TODO add options to set input and output directories, matching pattern
main(String[] args)66     public static void main(String[] args) throws Exception {
67         ConvertTransforms ct = new ConvertTransforms();
68         ct.processArgs(args);
69     }
70 
71     private boolean skipComments;
72     private boolean writeIndex = false;
73     private boolean verbose = false;
74     private boolean approvedOnly = false;
75 
76     int fileCount = 0;
77 
writeTransforms(String inputDirectory, String matchingPattern, String outputDirectory)78     public void writeTransforms(String inputDirectory, String matchingPattern, String outputDirectory)
79         throws IOException {
80         System.out.println(new File(inputDirectory).getCanonicalPath());
81         Factory cldrFactory = (approvedOnly) ? Factory.make(inputDirectory, matchingPattern, DraftStatus.contributed)
82             : Factory.make(inputDirectory, matchingPattern);
83         Set<String> ids = cldrFactory.getAvailable();
84         PrintWriter index = FileUtilities.openUTF8Writer(outputDirectory, "root.txt");
85         doHeader(index, "//", "root.txt");
86         try {
87             index.println("root {");
88             index.println("    RuleBasedTransliteratorIDs {");
89             // addAlias(index, "Latin", "el", "", "Latin", "Greek", "UNGEGN");
90             // addAlias(index, "el", "Latin", "", "Greek", "Latin", "UNGEGN");
91             // addAlias(index, "Latin", "Jamo", "", "Latin", "ConjoiningJamo", "");
92             addAlias(index, "Tone", "Digit", "", "Pinyin", "NumericPinyin", "");
93             addAlias(index, "Digit", "Tone", "", "NumericPinyin", "Pinyin", "");
94             // addAlias(index, "Simplified", "Traditional", "", "Hans", "Hant", "");
95             // addAlias(index, "Traditional", "Simplified", "", "Hant", "Hans", "");
96             for (String id : ids) {
97                 if (id.equals("All")) continue;
98                 try {
99                     convertFile(cldrFactory, id, outputDirectory, index);
100                 } catch (IOException e) {
101                     System.err.println("Failure in: " + id);
102                     throw e;
103                 }
104             }
105             index.println("    }");
106             index.println("    TransliteratorNamePattern {");
107             index.println("        // Format for the display name of a Transliterator.");
108             index.println("        // This is the language-neutral form of this resource.");
109             index.println("        \"{0,choice,0#|1#{1}|2#{1}-{2}}\" // Display name");
110             index.println("    }");
111             index.println("    // Transliterator display names");
112             index.println("    // This is the English form of this resource.");
113             index.println("    \"%Translit%Hex\"         { \"%Translit%Hex\" }");
114             index.println("    \"%Translit%UnicodeName\" { \"%Translit%UnicodeName\" }");
115             index.println("    \"%Translit%UnicodeChar\" { \"%Translit%UnicodeChar\" }");
116             index.println("    TransliterateLATIN{        ");
117             index.println("    \"\",");
118             index.println("    \"\"");
119             index.println("    }");
120             index.println("}");
121         } finally {
122             index.close();
123         }
124     }
125 
makePrintWriter(ByteArrayOutputStream bytes)126     public static PrintWriter makePrintWriter(ByteArrayOutputStream bytes) {
127         try {
128             OutputStreamWriter outStream = new OutputStreamWriter(bytes, "UTF-8");
129             BufferedWriter buff = new BufferedWriter(outStream, 4 * 1024);
130             PrintWriter p = new PrintWriter(buff);
131 
132             return p;
133         } catch (Exception e) {
134             System.err.println("Error: Could not create OutputStreamWriter.");
135         }
136         return null;
137     }
138 
showComments(PrintWriter toilet, String value)139     private void showComments(PrintWriter toilet, String value) {
140         String[] lines = value.trim().split("\\r\\n?|\\n");
141         for (String line : lines) {
142             if (!line.startsWith("#")) {
143                 line = "# " + line;
144             }
145             toilet.println(line);
146         }
147     }
148 
convertFile(Factory cldrFactory, String id, String outputDirectory, PrintWriter index)149     private void convertFile(Factory cldrFactory, String id, String outputDirectory, PrintWriter index)
150         throws IOException {
151         PrintWriter output = null;
152         String filename = null;
153         CLDRFile cldrFile = cldrFactory.make(id, false);
154         boolean first = true;
155         for (Iterator<String> it = cldrFile.iterator("", cldrFile.getComparator()); it.hasNext();) {
156             String path = it.next();
157             if (path.indexOf("/version") >= 0 || path.indexOf("/generation") >= 0) {
158                 continue;
159             }
160             String value = cldrFile.getStringValue(path);
161             if (first) {
162                 String fullPath = cldrFile.getFullXPath(path);
163                 filename = addIndexInfo(index, fullPath);
164                 if (filename == null) {
165                     return; // not a transform file!
166                 }
167                 output = FileUtilities.openUTF8Writer(outputDirectory, filename);
168                 doHeader(output, "#", filename);
169                 first = false;
170             }
171             if (path.indexOf("/comment") >= 0) {
172                 if (!skipComments) {
173                     showComments(output, value);
174                 }
175             } else if (path.indexOf("/tRule") >= 0) {
176                 value = fixup.transliterate(value);
177                 value = value.replaceAll(CldrUtility.LINE_SEPARATOR, System.lineSeparator());
178                 output.println(value);
179             } else {
180                 throw new IllegalArgumentException("Unknown element: " + path + "\t " + value);
181             }
182         }
183         if (output != null) { // null for transforms whose draft status is too low
184             output.close();
185         }
186     }
187 
188     public static final Transliterator fixup = Transliterator.getInstance("[:Mn:]any-hex/java");
189 
replaceUnquoted(String value, String toReplace, String replacement)190     public static String replaceUnquoted(String value, String toReplace, String replacement) {
191         // quick exit in most cases
192         if (value.indexOf(toReplace) < 0)
193             return value;
194 
195         String updatedValue = "";
196         int segmentStart = 0;
197         boolean inQuotes = false;
198         boolean ignoreCharValue = false;
199         int length = value.length();
200 
201         for (int pos = 0; pos < length; ++pos) {
202             char curChar = (char) 0;
203 
204             if (ignoreCharValue) {
205                 ignoreCharValue = false;
206             } else {
207                 curChar = value.charAt(pos);
208             }
209 
210             if (curChar == '\\') {
211                 // escape, ignore the value of the next char (actually the next UTF16 code unit, but that works here)
212                 ignoreCharValue = true;
213             }
214             boolean isLastChar = (pos + 1 >= length);
215             if (curChar == '\'' || isLastChar) {
216                 // quote, begin or end of a quoted literal (in which no replacement takes place)
217                 if (inQuotes) {
218                     // End of a quoted segment; guaranteed to include at least opening quote.
219                     // Just add the segment (including current char) to updatedValue.
220                     updatedValue = updatedValue + value.substring(segmentStart, pos + 1);
221                     segmentStart = pos + 1;
222                 } else {
223                     if (isLastChar)
224                         ++pos;
225                     if (pos > segmentStart) {
226                         // End of a nonempty unquoted segment; perform requested replacements and
227                         // then add segment to updatedValue.
228                         String currentSegment = value.substring(segmentStart, pos);
229                         updatedValue = updatedValue + currentSegment.replace(toReplace, replacement);
230                         segmentStart = pos;
231                     }
232                 }
233                 inQuotes = !inQuotes;
234             }
235             // else the char just becomes part of the current segment
236         }
237         return updatedValue;
238     }
239 
addIndexInfo(PrintWriter index, String path)240     private String addIndexInfo(PrintWriter index, String path) {
241         XPathParts parts = XPathParts.getFrozenInstance(path);
242         Map<String, String> attributes = parts.findAttributes("transform");
243         if (attributes == null) return null; // error, not a transform file
244         String source = attributes.get("source");
245         String target = attributes.get("target");
246         String variant = attributes.get("variant");
247         String direction = attributes.get("direction");
248         String alias = attributes.get("alias");
249         String backwardAlias = attributes.get("backwardAlias");
250         String visibility = attributes.get("visibility");
251 
252         String status = "internal".equals(visibility) ? "internal" : "file";
253 
254         fileCount++;
255 
256         String id = source + "-" + target;
257         String rid = target + "-" + source;
258         String filename = source + "_" + target;
259         if (variant != null) {
260             id += "/" + variant;
261             rid += "/" + variant;
262             filename += "_" + variant;
263         }
264         filename += ".txt";
265 
266         if (direction.equals("both") || direction.equals("forward")) {
267             if (verbose) {
268                 System.out.println("    " + id + "    " + filename + "    " + "FORWARD");
269             }
270             if (alias != null) {
271                 for (String ali : alias.trim().split("\\s+")) {
272                     addAlias(index, ali, id);
273                 }
274             }
275             index.println("        " + id + " {");
276             index.println("            " + status + " {");
277             index.println("                resource:process(transliterator) {\"" + filename + "\"}");
278             index.println("                direction {\"FORWARD\"}");
279             index.println("            }");
280             index.println("        }");
281         }
282         if (direction.equals("both") || direction.equals("backward")) {
283             if (verbose) {
284                 System.out.println("    " + rid + "    " + filename + "    " + "REVERSE");
285             }
286             if (backwardAlias != null) {
287                 for (String bali : backwardAlias.trim().split("\\s+")) {
288                     addAlias(index, bali, rid);
289                 }
290             }
291             index.println("        " + rid + " {");
292             index.println("            " + status + " {");
293             index.println("                resource:process(transliterator) {\"" + filename + "\"}");
294             index.println("                direction {\"REVERSE\"}");
295             index.println("            }");
296             index.println("        }");
297         }
298         index.println();
299         return filename;
300     }
301 
addAlias(PrintWriter index, String aliasSource, String aliasTarget, String aliasVariant, String originalSource, String originalTarget, String originalVariant)302     void addAlias(PrintWriter index, String aliasSource, String aliasTarget, String aliasVariant,
303         String originalSource, String originalTarget, String originalVariant) {
304         // Spacedhan-Han {
305         // alias {"null"}
306         // }
307         addAlias(index, getName(aliasSource, aliasTarget, aliasVariant),
308             getName(originalSource, originalTarget, originalVariant));
309     }
310 
addAlias(PrintWriter index, String alias, String original)311     private void addAlias(PrintWriter index, String alias, String original) {
312         index.println("        " + alias + " {");
313         index.println("            alias" + " {\"" + original + "\"}");
314         index.println("        }");
315     }
316 
getName(String source, String target, String variant)317     String getName(String source, String target, String variant) {
318         String id = source + "-" + target;
319         if (variant != null && variant.length() != 0) {
320             id += "/" + variant;
321         }
322         return id;
323     }
324 
doHeader(PrintWriter output, String quoteSymbol, String filename)325     private void doHeader(PrintWriter output, String quoteSymbol, String filename) {
326         output.print('\uFEFF');
327         output.println(quoteSymbol + " © 2016 and later: Unicode, Inc. and others.");
328         output.println(quoteSymbol + " License & terms of use: http://www.unicode.org/copyright.html#License");
329         output.println(quoteSymbol);
330         output.println(quoteSymbol + " File: " + filename);
331         output.println(quoteSymbol + " Generated from CLDR");
332         output.println(quoteSymbol);
333     }
334 
processArgs(String[] args)335     public void processArgs(String[] args) {
336         UOption.parseArgs(args, options);
337         if (options[HELP1].doesOccur || options[HELP2].doesOccur) {
338             System.out.println(HELP_TEXT1);
339             return;
340         }
341 
342         String sourceDir = options[SOURCEDIR].value; // Utility.COMMON_DIRECTORY + "transforms/";
343         String targetDir = options[DESTDIR].value; // Utility.GEN_DIRECTORY + "main/";
344         String match = options[MATCH].value;
345         skipComments = options[SKIP_COMMENTS].doesOccur;
346         writeIndex = options[WRITE_INDEX].doesOccur;
347         verbose = options[VERBOSE].doesOccur;
348         approvedOnly = options[APPROVED_ONLY].doesOccur;
349 
350         try {
351             if (writeIndex) {
352                 throw new InternalError("writeIndex not implemented.");
353             } else {
354                 ElapsedTimer et = new ElapsedTimer();
355                 writeTransforms(sourceDir, match, targetDir + File.separator);
356                 System.out.println("ConvertTransforms: wrote " + fileCount +
357                     " files in " + et);
358             }
359         } catch (IOException ex) {
360             RuntimeException e = new RuntimeException();
361             e.initCause(ex.getCause());
362             throw e;
363         } finally {
364             System.out.println("DONE");
365         }
366     }
367 
368     // fixData ONLY NEEDED TO FIX FILE PROBLEM
369     /*
370      * private void fixData(String inputDirectory, String matchingPattern, String outputDirectory) throws IOException {
371      * File dir = new File(inputDirectory);
372      * File[] files = dir.listFiles();
373      * for (int i = 0; i < files.length; ++i) {
374      * if (files[i].isDirectory()) continue;
375      * BufferedReader input = FileUtilities.openUTF8Reader("", files[i].getCanonicalPath());
376      * PrintWriter output = FileUtilities.openUTF8Writer("", outputDirectory + files[i].getName());
377      * while (true) {
378      * String line = input.readLine();
379      * if (line == null) break;
380      * if (line.indexOf("DOCTYPE") >= 0) {
381      * line = line.replaceAll(" ldml ", " supplementalData ");
382      * }
383      * output.println(line);
384      * }
385      * input.close();
386      * output.close();
387      * }
388      * }
389      */
390 
391 }
392