• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * ModifyCase.java
3  *
4  * Created on November 29, 2006, 12:53 PM
5  *
6  * To change this template, choose Tools | Template Manager
7  * and open the template in the editor.
8  */
9 
10 package org.unicode.cldr.tool;
11 
12 import java.io.BufferedWriter;
13 import java.io.FileWriter;
14 import java.io.IOException;
15 
16 import org.unicode.cldr.util.LDMLUtilities;
17 import org.w3c.dom.Document;
18 import org.w3c.dom.NamedNodeMap;
19 import org.w3c.dom.Node;
20 
21 import com.ibm.icu.dev.tool.UOption;
22 import com.ibm.icu.lang.UCharacter;
23 
24 /**
25  *
26  * @author pn153353
27  *
28  *         class will lower case data specified by an xpath and output the modified data only to a destination folder
29  *         then use CLDRModify to merge this output with the originasl data, thereby lower casing the CLDR source
30  *
31  *         TODO : handling of multiple xpaths not fully working - where elements have same parents - too amny parent
32  *         elements get written
33  */
34 public class ModifyCase {
35     static final int INDENT = 8;
36     static BufferedWriter m_out;
37 
38     static String[] m_locales; // = {"bg", "cs", "da", "et", "el", "is", "lt", "ro", "sl", "uk"};
39     static String[] m_xpaths; // = {"//ldml/localeDisplayNames/languages/language"};
40     // String xpath = "//ldml/localeDisplayNames/languages/language[@type='to']";
41     static String m_sourceDir; // = "/home/pn153353/pakua/CVS_unicode_latest/cldr/common/main";
42     static String m_destDir; // = "/home/pn153353/CLDR/BUGS/casing_1177/src";
43 
44     /** Creates a new instance of ModifyCase */
ModifyCase()45     public ModifyCase() {
46     }
47 
48     private static final int HELP1 = 0,
49         HELP2 = 1,
50         DESTDIR = 2,
51         LOCALES = 3,
52         SOURCEDIR = 4,
53         XPATHS = 5;
54 
55     private static final UOption[] options = {
56         UOption.HELP_H(),
57         UOption.HELP_QUESTION_MARK(),
58         UOption.create("dest", 'd', UOption.REQUIRES_ARG),
59         UOption.create("locales", 'l', UOption.REQUIRES_ARG),
60         UOption.create("source", 's', UOption.REQUIRES_ARG),
61         UOption.create("xpaths", 'x', UOption.REQUIRES_ARG),
62     };
63 
main(String[] args)64     public static void main(String[] args) {
65         UOption.parseArgs(args, options);
66         if (processArgs() == false)
67             return;
68 
69         for (int i = 0; i < m_locales.length; i++) {
70             System.err.println("Locale : " + m_locales[i]);
71             String srcfile = m_sourceDir + "/" + m_locales[i] + ".xml";
72             String destfile = m_destDir + "/" + m_locales[i] + ".xml";
73             Document doc = LDMLUtilities.parse(srcfile, false);
74             if (doc == null)
75                 continue;
76             try {
77                 m_out = new BufferedWriter(new FileWriter(destfile));
78                 openLDML(m_locales[i], doc);
79 
80                 for (int j = 0; j < m_xpaths.length; j++) {
81                     makeLowerCase(doc, m_xpaths[j]);
82                 }
83                 closeLDML();
84             } catch (IOException e) {
85             }
86         }
87     }
88 
usage()89     private static void usage() {
90         System.err.println("org.unicode.cldr.tool.ModifyCase allows the casing of the first letter to be changed");
91         System.err
92             .println("The output is just the data category which has changed. Run CLDRModify to merge with source");
93         System.err.println("-d : specify dest dir (must exist) where resulting modified data is written");
94         System.err.println("-l : specify comma separated list of LDML locales to be changed");
95         System.err.println("-s : specify src dir of LDML data to be modified");
96         System.err.println("-x : specify comma separated list of xpaths to data to be modified");
97         System.err
98             .println("Example : ModifyCase -d /dest -s /cldr/comon/main -l bg,en,it,fr -x //ldml/localeDisplayNames/languages/language");
99     }
100 
processArgs()101     private static boolean processArgs() {
102         if (options[HELP1].doesOccur || options[HELP2].doesOccur) {
103             usage();
104             return false;
105         }
106         if (options[DESTDIR].value == null || options[LOCALES].value == null ||
107             options[SOURCEDIR].value == null || options[XPATHS].value == null) {
108             usage();
109             return false;
110         }
111 
112         m_destDir = options[DESTDIR].value;
113         m_locales = options[LOCALES].value.split(",");
114         m_sourceDir = options[SOURCEDIR].value;
115         m_xpaths = options[XPATHS].value.split(",");
116         return true;
117     }
118 
openLDML(String locale, Document doc)119     public static void openLDML(String locale, Document doc) {
120         try {
121             m_out.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n");
122             m_out.write("<!DOCTYPE ldml SYSTEM \"http://www.unicode.org/cldr/dtd/1.5/ldml.dtd\">\n");
123             m_out.write("<ldml>\n");
124             indent(INDENT);
125             m_out.write("<identity>\n");
126             Node n = LDMLUtilities.getNode(doc, "//ldml/identity/version/@number");
127             indent(INDENT * 2);
128             m_out.write("<version number=\"" + LDMLUtilities.getNodeValue(n) + "\"/>\n");
129             n = LDMLUtilities.getNode(doc, "//ldml/identity/generation/@date");
130             indent(INDENT * 2);
131             m_out.write("<generation date=\"" + LDMLUtilities.getNodeValue(n) + "\"/>\n");
132             String parts[] = locale.split("_");
133             indent(INDENT * 2);
134             m_out.write("<language type=\"" + parts[0] + "\"/>\n");
135             if (parts.length > 1) {
136                 indent(INDENT * 2);
137                 m_out.write("<territory type=\"" + parts[1] + "\"/>\n");
138             }
139             indent(INDENT);
140             m_out.write("</identity>\n");
141         } catch (IOException e) {
142         }
143     }
144 
makeLowerCase(Document doc, String xpath)145     public static void makeLowerCase(Document doc, String xpath) {
146         // parse the xpath to write the LDML
147         try {
148             // remove //ldml prefix and split
149             String path = xpath.substring(xpath.indexOf("//ldml") + 7);
150             String parts[] = path.split("/");
151             for (int i = 0; i < parts.length - 1; i++) {
152                 indent(INDENT * (i + 1));
153                 if (addCasingAttribute(parts[i]))
154                     m_out.write("<" + parts[i] + " casing=\"lowercase-words\">\n");
155                 else
156                     m_out.write("<" + parts[i] + ">\n");
157             }
158 
159             Node n[] = LDMLUtilities.getNodeListAsArray(doc, xpath);
160             if (n == null) // just changing a single element
161             { // not tested, this may not work !
162                 n = new Node[1];
163                 n[0] = LDMLUtilities.getNode(doc, xpath);
164             }
165 
166             for (int j = 0; j < n.length; j++) {
167                 if (n[j] != null) {
168                     String value = LDMLUtilities.getNodeValue(n[j]);
169                     boolean bUpperFound = false;
170                     for (int k = 1; k < value.length(); k++) // skip first char
171                     {
172                         int c = value.codePointAt(k);
173                         if (UCharacter.isUUppercase(c)) {
174                             bUpperFound = true;
175                             break;
176                         }
177                     }
178                     if (bUpperFound == true) // don't convert where an upper case is found mid sentence
179                     {
180                         NamedNodeMap map = n[j].getAttributes();
181                         Node langnode = map.getNamedItem("type");
182                         String lang = langnode.getNodeValue();
183                         System.err.println("Skipping conversion of : " + lang + "  " + value);
184                     }
185 
186                     if (bUpperFound == false) // don't convert where an upper case is found mid sentence
187                         value = value.toLowerCase();
188 
189                     indent(INDENT * parts.length);
190                     m_out.write("<" + parts[parts.length - 1]);
191 
192                     NamedNodeMap map = n[j].getAttributes();
193                     for (int k = 0; k < map.getLength(); k++) {
194                         Node node = map.item(k);
195                         m_out.write(" " + node.getNodeName() + "=\"" + node.getNodeValue() + "\"");
196                     }
197                     m_out.write(">" + value + "</" + parts[parts.length - 1] + ">\n");
198 
199                 }
200             }
201 
202             for (int i = parts.length - 2; i >= 0; i--) {
203                 indent(INDENT * (i + 1));
204                 m_out.write("</" + parts[i] + ">\n");
205             }
206         } catch (IOException e) {
207         }
208 
209         // Factory cldrFactory = Factory.make(sourceDir, ".*");
210         // boolean makeResolved = false;
211         // CLDRFile file = (CLDRFile) cldrFactory.make(locale, makeResolved).cloneAsThawed();
212         // System.err.println ("res = " + file.getStringValue
213         // ("//ldml/localeDisplayNames/languages/language[@type=\"en\"]"));
214 
215     }
216 
closeLDML()217     public static void closeLDML() {
218         try {
219             m_out.write("</ldml>\n");
220             m_out.close();
221         } catch (IOException e) {
222         }
223 
224     }
225 
indent(int n)226     private static void indent(int n) {
227         try {
228             String spaces = "";
229             for (int i = 0; i < n; i++)
230                 spaces += " ";
231             m_out.write(spaces);
232         } catch (IOException e) {
233         }
234     }
235 
236     /* checks if the element can have a casing attribute */
addCasingAttribute(String element)237     private static boolean addCasingAttribute(String element) {
238         String[] elements_with_casing_attribute = {
239             "languages", "scripts", "territories", "variants",
240             "keys", "types", "measurementSystemNames", "monthWidth",
241             "dayWidth", "quarterWidth", "long" /* tz */, "fields", "currency" };
242 
243         for (int i = 0; i < elements_with_casing_attribute.length; i++) {
244             if (element.compareTo(elements_with_casing_attribute[i]) == 0)
245                 return true;
246         }
247         return false;
248     }
249 }
250