1 /* 2 * ModifyCase.java 3 * 4 * Created on November 29, 2006, 12:53 PM 5 * 6 * To change this template, choose Tools | Template Manager 7 * and open the template in the editor. 8 */ 9 10 package org.unicode.cldr.tool; 11 12 import java.io.BufferedWriter; 13 import java.io.FileWriter; 14 import java.io.IOException; 15 16 import org.unicode.cldr.util.LDMLUtilities; 17 import org.w3c.dom.Document; 18 import org.w3c.dom.NamedNodeMap; 19 import org.w3c.dom.Node; 20 21 import com.ibm.icu.dev.tool.UOption; 22 import com.ibm.icu.lang.UCharacter; 23 24 /** 25 * 26 * @author pn153353 27 * 28 * class will lower case data specified by an xpath and output the modified data only to a destination folder 29 * then use CLDRModify to merge this output with the originasl data, thereby lower casing the CLDR source 30 * 31 * TODO : handling of multiple xpaths not fully working - where elements have same parents - too amny parent 32 * elements get written 33 */ 34 public class ModifyCase { 35 static final int INDENT = 8; 36 static BufferedWriter m_out; 37 38 static String[] m_locales; // = {"bg", "cs", "da", "et", "el", "is", "lt", "ro", "sl", "uk"}; 39 static String[] m_xpaths; // = {"//ldml/localeDisplayNames/languages/language"}; 40 // String xpath = "//ldml/localeDisplayNames/languages/language[@type='to']"; 41 static String m_sourceDir; // = "/home/pn153353/pakua/CVS_unicode_latest/cldr/common/main"; 42 static String m_destDir; // = "/home/pn153353/CLDR/BUGS/casing_1177/src"; 43 44 /** Creates a new instance of ModifyCase */ ModifyCase()45 public ModifyCase() { 46 } 47 48 private static final int HELP1 = 0, 49 HELP2 = 1, 50 DESTDIR = 2, 51 LOCALES = 3, 52 SOURCEDIR = 4, 53 XPATHS = 5; 54 55 private static final UOption[] options = { 56 UOption.HELP_H(), 57 UOption.HELP_QUESTION_MARK(), 58 UOption.create("dest", 'd', UOption.REQUIRES_ARG), 59 UOption.create("locales", 'l', UOption.REQUIRES_ARG), 60 UOption.create("source", 's', UOption.REQUIRES_ARG), 61 UOption.create("xpaths", 'x', UOption.REQUIRES_ARG), 62 }; 63 main(String[] args)64 public static void main(String[] args) { 65 UOption.parseArgs(args, options); 66 if (processArgs() == false) 67 return; 68 69 for (int i = 0; i < m_locales.length; i++) { 70 System.err.println("Locale : " + m_locales[i]); 71 String srcfile = m_sourceDir + "/" + m_locales[i] + ".xml"; 72 String destfile = m_destDir + "/" + m_locales[i] + ".xml"; 73 Document doc = LDMLUtilities.parse(srcfile, false); 74 if (doc == null) 75 continue; 76 try { 77 m_out = new BufferedWriter(new FileWriter(destfile)); 78 openLDML(m_locales[i], doc); 79 80 for (int j = 0; j < m_xpaths.length; j++) { 81 makeLowerCase(doc, m_xpaths[j]); 82 } 83 closeLDML(); 84 } catch (IOException e) { 85 } 86 } 87 } 88 usage()89 private static void usage() { 90 System.err.println("org.unicode.cldr.tool.ModifyCase allows the casing of the first letter to be changed"); 91 System.err 92 .println("The output is just the data category which has changed. Run CLDRModify to merge with source"); 93 System.err.println("-d : specify dest dir (must exist) where resulting modified data is written"); 94 System.err.println("-l : specify comma separated list of LDML locales to be changed"); 95 System.err.println("-s : specify src dir of LDML data to be modified"); 96 System.err.println("-x : specify comma separated list of xpaths to data to be modified"); 97 System.err 98 .println("Example : ModifyCase -d /dest -s /cldr/comon/main -l bg,en,it,fr -x //ldml/localeDisplayNames/languages/language"); 99 } 100 processArgs()101 private static boolean processArgs() { 102 if (options[HELP1].doesOccur || options[HELP2].doesOccur) { 103 usage(); 104 return false; 105 } 106 if (options[DESTDIR].value == null || options[LOCALES].value == null || 107 options[SOURCEDIR].value == null || options[XPATHS].value == null) { 108 usage(); 109 return false; 110 } 111 112 m_destDir = options[DESTDIR].value; 113 m_locales = options[LOCALES].value.split(","); 114 m_sourceDir = options[SOURCEDIR].value; 115 m_xpaths = options[XPATHS].value.split(","); 116 return true; 117 } 118 openLDML(String locale, Document doc)119 public static void openLDML(String locale, Document doc) { 120 try { 121 m_out.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"); 122 m_out.write("<!DOCTYPE ldml SYSTEM \"http://www.unicode.org/cldr/dtd/1.5/ldml.dtd\">\n"); 123 m_out.write("<ldml>\n"); 124 indent(INDENT); 125 m_out.write("<identity>\n"); 126 Node n = LDMLUtilities.getNode(doc, "//ldml/identity/version/@number"); 127 indent(INDENT * 2); 128 m_out.write("<version number=\"" + LDMLUtilities.getNodeValue(n) + "\"/>\n"); 129 n = LDMLUtilities.getNode(doc, "//ldml/identity/generation/@date"); 130 indent(INDENT * 2); 131 m_out.write("<generation date=\"" + LDMLUtilities.getNodeValue(n) + "\"/>\n"); 132 String parts[] = locale.split("_"); 133 indent(INDENT * 2); 134 m_out.write("<language type=\"" + parts[0] + "\"/>\n"); 135 if (parts.length > 1) { 136 indent(INDENT * 2); 137 m_out.write("<territory type=\"" + parts[1] + "\"/>\n"); 138 } 139 indent(INDENT); 140 m_out.write("</identity>\n"); 141 } catch (IOException e) { 142 } 143 } 144 makeLowerCase(Document doc, String xpath)145 public static void makeLowerCase(Document doc, String xpath) { 146 // parse the xpath to write the LDML 147 try { 148 // remove //ldml prefix and split 149 String path = xpath.substring(xpath.indexOf("//ldml") + 7); 150 String parts[] = path.split("/"); 151 for (int i = 0; i < parts.length - 1; i++) { 152 indent(INDENT * (i + 1)); 153 if (addCasingAttribute(parts[i])) 154 m_out.write("<" + parts[i] + " casing=\"lowercase-words\">\n"); 155 else 156 m_out.write("<" + parts[i] + ">\n"); 157 } 158 159 Node n[] = LDMLUtilities.getNodeListAsArray(doc, xpath); 160 if (n == null) // just changing a single element 161 { // not tested, this may not work ! 162 n = new Node[1]; 163 n[0] = LDMLUtilities.getNode(doc, xpath); 164 } 165 166 for (int j = 0; j < n.length; j++) { 167 if (n[j] != null) { 168 String value = LDMLUtilities.getNodeValue(n[j]); 169 boolean bUpperFound = false; 170 for (int k = 1; k < value.length(); k++) // skip first char 171 { 172 int c = value.codePointAt(k); 173 if (UCharacter.isUUppercase(c)) { 174 bUpperFound = true; 175 break; 176 } 177 } 178 if (bUpperFound == true) // don't convert where an upper case is found mid sentence 179 { 180 NamedNodeMap map = n[j].getAttributes(); 181 Node langnode = map.getNamedItem("type"); 182 String lang = langnode.getNodeValue(); 183 System.err.println("Skipping conversion of : " + lang + " " + value); 184 } 185 186 if (bUpperFound == false) // don't convert where an upper case is found mid sentence 187 value = value.toLowerCase(); 188 189 indent(INDENT * parts.length); 190 m_out.write("<" + parts[parts.length - 1]); 191 192 NamedNodeMap map = n[j].getAttributes(); 193 for (int k = 0; k < map.getLength(); k++) { 194 Node node = map.item(k); 195 m_out.write(" " + node.getNodeName() + "=\"" + node.getNodeValue() + "\""); 196 } 197 m_out.write(">" + value + "</" + parts[parts.length - 1] + ">\n"); 198 199 } 200 } 201 202 for (int i = parts.length - 2; i >= 0; i--) { 203 indent(INDENT * (i + 1)); 204 m_out.write("</" + parts[i] + ">\n"); 205 } 206 } catch (IOException e) { 207 } 208 209 // Factory cldrFactory = Factory.make(sourceDir, ".*"); 210 // boolean makeResolved = false; 211 // CLDRFile file = (CLDRFile) cldrFactory.make(locale, makeResolved).cloneAsThawed(); 212 // System.err.println ("res = " + file.getStringValue 213 // ("//ldml/localeDisplayNames/languages/language[@type=\"en\"]")); 214 215 } 216 closeLDML()217 public static void closeLDML() { 218 try { 219 m_out.write("</ldml>\n"); 220 m_out.close(); 221 } catch (IOException e) { 222 } 223 224 } 225 indent(int n)226 private static void indent(int n) { 227 try { 228 String spaces = ""; 229 for (int i = 0; i < n; i++) 230 spaces += " "; 231 m_out.write(spaces); 232 } catch (IOException e) { 233 } 234 } 235 236 /* checks if the element can have a casing attribute */ addCasingAttribute(String element)237 private static boolean addCasingAttribute(String element) { 238 String[] elements_with_casing_attribute = { 239 "languages", "scripts", "territories", "variants", 240 "keys", "types", "measurementSystemNames", "monthWidth", 241 "dayWidth", "quarterWidth", "long" /* tz */, "fields", "currency" }; 242 243 for (int i = 0; i < elements_with_casing_attribute.length; i++) { 244 if (element.compareTo(elements_with_casing_attribute[i]) == 0) 245 return true; 246 } 247 return false; 248 } 249 } 250