1 package org.unicode.cldr.test; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.util.ArrayList; 7 import java.util.EnumMap; 8 import java.util.HashMap; 9 import java.util.LinkedHashSet; 10 import java.util.List; 11 import java.util.Map; 12 import java.util.Set; 13 import java.util.regex.Matcher; 14 import java.util.regex.Pattern; 15 16 import org.unicode.cldr.test.CheckConsistentCasing.CasingType; 17 import org.unicode.cldr.test.CheckConsistentCasing.CasingTypeAndErrFlag; 18 import org.unicode.cldr.test.CheckConsistentCasing.Category; 19 import org.unicode.cldr.tool.Option.Options; 20 import org.unicode.cldr.util.CLDRFile; 21 import org.unicode.cldr.util.CLDRFile.WinningChoice; 22 import org.unicode.cldr.util.CLDRPaths; 23 import org.unicode.cldr.util.CldrUtility; 24 import org.unicode.cldr.util.Factory; 25 import org.unicode.cldr.util.LocaleIDParser; 26 import org.unicode.cldr.util.PatternCache; 27 import org.unicode.cldr.util.SimpleXMLSource; 28 import org.unicode.cldr.util.SupplementalDataInfo; 29 import org.unicode.cldr.util.XMLFileReader; 30 import org.unicode.cldr.util.XMLSource; 31 import org.unicode.cldr.util.XPathParts; 32 33 import com.ibm.icu.text.MessageFormat; 34 import com.ibm.icu.text.UnicodeSet; 35 36 /** 37 * Calculates, reads, writes and returns casing information about locales for 38 * CheckConsistentCasing. 39 * Run main() to generate the casing information files which will be stored in common/casing. 40 * 41 * @author jchye 42 */ 43 public class CasingInfo { 44 private static final Options options = new Options( 45 "This program is used to generate casing files for locales.") 46 .add("locales", ".*", ".*", "A regex of the locales to generate casing information for") 47 .add("summary", null, 48 "generates a summary of the casing for all locales that had casing generated for this run"); 49 private Map<String, Map<Category, CasingTypeAndErrFlag>> casing; 50 private List<File> casingDirs; 51 CasingInfo(Factory factory)52 public CasingInfo(Factory factory) { 53 casingDirs = new ArrayList<File>(); 54 for (File f : factory.getSourceDirectories()) { 55 this.casingDirs.add(new File(f.getAbsolutePath() + "/../casing")); 56 } 57 casing = CldrUtility.newConcurrentHashMap(); 58 } 59 60 /** 61 * ONLY usable in command line tests. 62 */ CasingInfo()63 public CasingInfo() { 64 casingDirs = new ArrayList<File>(); 65 this.casingDirs.add(new File(CLDRPaths.CASING_DIRECTORY)); 66 casing = CldrUtility.newConcurrentHashMap(); 67 } 68 69 /** 70 * Returns casing information to be used for a specified locale. 71 * 72 * @param localeID 73 * @return 74 */ getLocaleCasing(String localeID)75 public Map<Category, CasingTypeAndErrFlag> getLocaleCasing(String localeID) { 76 // Check if the localeID contains casing first. 77 // If there isn't a casing file available for the locale, 78 // recurse over the locale's parents until something is found. 79 if (!casing.containsKey(localeID)) { 80 // Synchronize writes to casing map in an attempt to avoid NPEs (cldrbug 5051). 81 synchronized (casing) { 82 CasingHandler handler = loadFromXml(localeID); 83 if (handler != null) { 84 handler.addParsedResult(casing); 85 } 86 if (!casing.containsKey(localeID)) { 87 String parentID = LocaleIDParser.getSimpleParent(localeID); 88 if (!parentID.equals("root")) { 89 casing.put(localeID, getLocaleCasing(parentID)); 90 } 91 } 92 } 93 } 94 95 return casing.get(localeID); 96 } 97 98 /** 99 * Loads casing information about a specified locale from the casing XML, 100 * if it exists. 101 * 102 * @param localeID 103 */ loadFromXml(String localeID)104 private CasingHandler loadFromXml(String localeID) { 105 for (File casingDir : casingDirs) { 106 File casingFile = new File(casingDir, localeID + ".xml"); 107 if (casingFile.isFile()) { 108 CasingHandler handler = new CasingHandler(); 109 XMLFileReader xfr = new XMLFileReader().setHandler(handler); 110 xfr.read(casingFile.toString(), -1, true); 111 return handler; 112 } 113 } // Fail silently if file not found. 114 return null; 115 } 116 117 /** 118 * Calculates casing information about all languages from the locale data. 119 */ generateCasingInformation(String localePattern)120 private Map<String, Boolean> generateCasingInformation(String localePattern) { 121 SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance(); 122 Set<String> defaultContentLocales = supplementalDataInfo.getDefaultContentLocales(); 123 String sourceDirectory = CldrUtility.checkValidDirectory(CLDRPaths.MAIN_DIRECTORY); 124 Factory cldrFactory = Factory.make(sourceDirectory, localePattern); 125 Set<String> locales = new LinkedHashSet<String>(cldrFactory.getAvailable()); 126 locales.removeAll(defaultContentLocales); // Skip all default content locales 127 UnicodeSet allCaps = new UnicodeSet("[:Lu:]"); 128 Map<String, Boolean> localeUsesCasing = new HashMap<String, Boolean>(); 129 LocaleIDParser parser = new LocaleIDParser(); 130 131 for (String localeID : locales) { 132 if (CLDRFile.isSupplementalName(localeID)) continue; 133 134 // We want country/script differences but not region differences 135 // (unless it's pt_PT, which we do want). 136 // Keep regional locales only if there isn't already a locale for its script, 137 // e.g. keep zh_Hans_HK because zh_Hans is a default locale. 138 parser.set(localeID); 139 if (parser.getRegion().length() > 0 && !localeID.equals("pt_PT")) { 140 System.out.println("Skipping regional locale " + localeID); 141 continue; 142 } 143 144 // Save casing information about the locale. 145 CLDRFile file = cldrFactory.make(localeID, true); 146 UnicodeSet examplars = file.getExemplarSet("", WinningChoice.NORMAL); 147 localeUsesCasing.put(localeID, examplars.containsSome(allCaps)); 148 createCasingXml(localeID, CheckConsistentCasing.getSamples(file)); 149 } 150 return localeUsesCasing; 151 } 152 153 /** 154 * Creates a CSV summary of casing information over all locales for verification. 155 * 156 * @param outputFile 157 */ createCasingSummary(String outputFile, Map<String, Boolean> localeUsesCasing)158 private void createCasingSummary(String outputFile, Map<String, Boolean> localeUsesCasing) { 159 PrintWriter out; 160 try { 161 out = new PrintWriter(outputFile); 162 } catch (IOException e) { 163 e.printStackTrace(); 164 return; 165 } 166 167 // Header 168 out.print(","); 169 for (Category category : Category.values()) { 170 out.print("," + category.toString().replace('_', '-')); 171 } 172 out.println(); 173 out.print("Locale ID,Case"); 174 for (int i = 0; i < Category.values().length; i++) { 175 out.print("," + i); 176 } 177 out.println(); 178 179 Set<String> locales = casing.keySet(); 180 for (String localeID : locales) { 181 // Write casing information about the locale to file. 182 out.print(localeID); 183 out.print(","); 184 out.print(localeUsesCasing.get(localeID) ? "Y" : "N"); 185 Map<Category, CasingTypeAndErrFlag> types = casing.get(localeID); 186 for (Category category : Category.values()) { 187 CasingTypeAndErrFlag value = types.get(category); 188 out.print("," + value == null ? null : value.type().toString().charAt(0)); 189 } 190 out.println(); 191 out.flush(); 192 } 193 out.close(); 194 } 195 196 /** 197 * Writes casing information for the specified locale to XML format. 198 */ createCasingXml(String localeID, Map<Category, CasingType> localeCasing)199 private void createCasingXml(String localeID, Map<Category, CasingType> localeCasing) { 200 // Load any existing overrides over casing info. 201 CasingHandler handler = loadFromXml(localeID); 202 Map<Category, CasingType> overrides = handler == null ? new EnumMap<Category, CasingType>(Category.class) : handler.getOverrides(); 203 localeCasing.putAll(overrides); 204 205 XMLSource source = new SimpleXMLSource(localeID); 206 for (Category category : Category.values()) { 207 if (category == Category.NOT_USED) continue; 208 CasingType type = localeCasing.get(category); 209 if (overrides.containsKey(category)) { 210 String path = MessageFormat.format("//ldml/metadata/casingData/casingItem[@type=\"{0}\"][@override=\"true\"]", category); 211 source.putValueAtPath(path, type.toString()); 212 } else if (type != CasingType.other) { 213 String path = "//ldml/metadata/casingData/casingItem[@type=\"" + category + "\"]"; 214 source.putValueAtPath(path, type.toString()); 215 } 216 } 217 CLDRFile cldrFile = new CLDRFile(source); 218 File casingFile = new File(CLDRPaths.GEN_DIRECTORY + "/casing", localeID + ".xml"); 219 220 try { 221 PrintWriter out = new PrintWriter(casingFile); 222 cldrFile.write(out); 223 out.close(); 224 } catch (IOException e) { 225 e.printStackTrace(); 226 } 227 } 228 229 /** 230 * Generates all the casing information and writes it to XML. 231 * A CSV summary of casing information is written to file if a filename argument is provided. 232 * 233 * @param args 234 */ main(String[] args)235 public static void main(String[] args) { 236 CasingInfo casingInfo = new CasingInfo(); 237 options.parse(args, true); 238 Map<String, Boolean> localeUsesCasing = casingInfo.generateCasingInformation(options.get("locales").getValue()); 239 if (options.get("summary").doesOccur()) { 240 casingInfo.createCasingSummary(args[0], localeUsesCasing); 241 } 242 } 243 244 /** 245 * XML handler for parsing casing files. 246 */ 247 private class CasingHandler extends XMLFileReader.SimpleHandler { 248 private Pattern localePattern = PatternCache.get("//ldml/identity/language\\[@type=\"(\\w+)\"\\]"); 249 private String localeID; 250 private Map<Category, CasingTypeAndErrFlag> caseMap = new EnumMap<Category, CasingTypeAndErrFlag>(Category.class); 251 private Map<Category, CasingType> overrideMap = new EnumMap<Category, CasingType>(Category.class); 252 253 @Override handlePathValue(String path, String value)254 public void handlePathValue(String path, String value) { 255 // Parse casing info. 256 if (path.contains("casingItem")) { 257 XPathParts parts = new XPathParts().set(path); 258 Category category = Category.valueOf(parts.getAttributeValue(-1, "type").replace('-', '_')); 259 CasingType casingType = CasingType.valueOf(value); 260 boolean errFlag = Boolean.parseBoolean(parts.getAttributeValue(-1, "forceError")); 261 for (CasingTypeAndErrFlag typeAndFlag : CasingTypeAndErrFlag.values()) { 262 if (casingType == typeAndFlag.type() && errFlag == typeAndFlag.flag()) { 263 caseMap.put(category, typeAndFlag); 264 break; 265 } 266 } 267 if (Boolean.valueOf(parts.getAttributeValue(-1, "override"))) { 268 overrideMap.put(category, casingType); 269 } 270 } else { 271 // Parse the locale that the casing is for. 272 Matcher matcher = localePattern.matcher(path); 273 if (matcher.matches()) { 274 localeID = matcher.group(1); 275 } 276 } 277 } 278 addParsedResult(Map<String, Map<Category, CasingTypeAndErrFlag>> map)279 public void addParsedResult(Map<String, Map<Category, CasingTypeAndErrFlag>> map) { 280 map.put(localeID, caseMap); 281 } 282 getOverrides()283 public Map<Category, CasingType> getOverrides() { 284 return overrideMap; 285 } 286 } 287 } 288