1 package org.unicode.cldr.tool; 2 3 import java.io.DataOutputStream; 4 import java.io.File; 5 import java.io.FileOutputStream; 6 import java.io.IOException; 7 import java.io.PrintWriter; 8 import java.util.ArrayList; 9 import java.util.Arrays; 10 import java.util.Collections; 11 import java.util.HashMap; 12 import java.util.HashSet; 13 import java.util.Map; 14 import java.util.Map.Entry; 15 import java.util.Set; 16 import java.util.TreeMap; 17 import java.util.TreeSet; 18 import java.util.regex.Matcher; 19 import java.util.regex.Pattern; 20 21 import org.unicode.cldr.draft.FileUtilities; 22 import org.unicode.cldr.test.OutdatedPaths; 23 import org.unicode.cldr.tool.Option.Options; 24 import org.unicode.cldr.util.CLDRConfig; 25 import org.unicode.cldr.util.CLDRFile; 26 import org.unicode.cldr.util.CLDRPaths; 27 import org.unicode.cldr.util.Factory; 28 import org.unicode.cldr.util.LanguageTagParser; 29 import org.unicode.cldr.util.PatternCache; 30 import org.unicode.cldr.util.SimpleFactory; 31 import org.unicode.cldr.util.StringId; 32 33 import com.ibm.icu.impl.Relation; 34 import com.ibm.icu.impl.Row; 35 import com.ibm.icu.impl.Row.R3; 36 import com.ibm.icu.lang.CharSequences; 37 38 public class GenerateBirth { 39 private static boolean DEBUG = false; 40 41 public enum Versions { 42 trunk, v31_0, v30_0, v29_0, v28_0, v27_0, v26_0, v25_0, v24_0, v23_1, v22_1, v21_0, v2_0_1, v1_9_1, v1_8_1, v1_7_2, v1_6_1, v1_5_1, v1_4_1, v1_3_0, v1_2_0, v1_1_1; toString()43 public String toString() { 44 return this == Versions.trunk ? name() : name().substring(1).replace('_', '.'); 45 }; 46 } 47 48 static final Versions[] VERSIONS = Versions.values(); 49 static final Factory[] factories = new Factory[VERSIONS.length]; 50 51 final static Options myOptions = new Options() 52 .add("target", ".*", CLDRPaths.BIRTH_DATA_DIR, 53 "The target directory for building the text files that show the results.") 54 .add("log", ".*", CLDRPaths.TMP_DIRECTORY + "dropbox/births/", 55 "The target directory for building the text files that show the results.") 56 .add( 57 "file", 58 ".*", 59 ".*", 60 "Filter the information based on file name, using a regex argument. The '.xml' is removed from the file before filtering") 61 .add("previous", "Stop after writing the English previous data.") 62 .add("debug", "Debug"); 63 main(String[] args)64 public static void main(String[] args) throws IOException { 65 myOptions.parse(args, true); 66 67 // set up the CLDR Factories 68 69 DEBUG = myOptions.get("debug").doesOccur(); 70 71 final CLDRConfig config = CLDRConfig.getInstance(); 72 73 String filePattern = myOptions.get("file").getValue(); 74 75 ArrayList<Factory> list = new ArrayList<Factory>(); 76 for (Versions version : VERSIONS) { 77 String base = version == Versions.trunk 78 ? CLDRPaths.BASE_DIRECTORY 79 : CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + version + "/"; 80 File[] paths = version.compareTo(Versions.v27_0) > 0 // warning, order is reversed 81 ? new File[] { new File(base + "common/main/") } 82 : new File[] { new File(base + "common/main/"), new File(base + "common/annotations/") }; 83 System.out.println(version + ", " + Arrays.asList(paths)); 84 Factory aFactory = SimpleFactory.make(paths, filePattern); 85 list.add(aFactory); 86 } 87 list.toArray(factories); 88 89 final String dataDirectory = myOptions.get("target").getValue(); 90 91 // load and process English 92 93 String outputDirectory = myOptions.get("log").getValue(); 94 95 System.out.println("en"); 96 Births english = new Births("en"); 97 english.writeBirth(outputDirectory, "en", null); 98 english.writeBirthValues(dataDirectory + "/" + OutdatedPaths.OUTDATED_ENGLISH_DATA); 99 100 // if (!myOptions.get("file").doesOccur()) { 101 // OutdatedPaths outdatedPaths = new OutdatedPaths(dataDirectory); 102 // 103 // return; 104 // } 105 // Set up the binary data file 106 107 File file = new File(dataDirectory + "/" + OutdatedPaths.OUTDATED_DATA); 108 final String outputDataFile = file.getCanonicalPath(); 109 System.out.println("Writing data: " + outputDataFile); 110 DataOutputStream dataOut = new DataOutputStream(new FileOutputStream(file)); 111 112 // Load and process all the locales 113 114 TreeMap<String, Set<String>> localeToNewer = new TreeMap<String, Set<String>>(); 115 LanguageTagParser ltp = new LanguageTagParser(); 116 for (String fileName : factories[0].getAvailable()) { 117 if (fileName.equals("en")) { 118 continue; 119 } 120 if (!ltp.set(fileName).getRegion().isEmpty()) { 121 continue; // skip region locales 122 } 123 // TODO skip default content locales 124 System.out.println(fileName); 125 Births other = new Births(fileName); 126 Set<String> newer = other.writeBirth(outputDirectory, fileName, english); 127 128 dataOut.writeUTF(fileName); 129 dataOut.writeInt(newer.size()); 130 for (String item : newer) { 131 long id = StringId.getId(item); 132 dataOut.writeLong(id); 133 if (DEBUG) { 134 System.out.println(id + "\t" + item); 135 } 136 } 137 localeToNewer.put(fileName, newer); 138 } 139 dataOut.writeUTF("$END$"); 140 dataOut.close(); 141 142 // Doublecheck the data 143 144 OutdatedPaths outdatedPaths = new OutdatedPaths(dataDirectory); 145 Set<String> needPrevious = new TreeSet<String>(); 146 int errorCount = 0; 147 for (Entry<String, Set<String>> localeAndNewer : localeToNewer.entrySet()) { 148 String locale = localeAndNewer.getKey(); 149 System.out.println("Checking " + locale); 150 Set<String> newer = localeAndNewer.getValue(); 151 if (newer.size() != outdatedPaths.countOutdated(locale)) { 152 throw new IllegalArgumentException("broken: " + locale); 153 } 154 for (String xpath : newer) { 155 boolean isOutdated = outdatedPaths.isRawOutdated(locale, xpath); 156 if (!isOutdated) { 157 System.out.println("Error, broken locale: " + locale + "\t" + StringId.getId(xpath) + "\t" + xpath); 158 ++errorCount; 159 } 160 if (outdatedPaths.isSkipped(xpath)) { 161 continue; 162 } 163 String previous = outdatedPaths.getPreviousEnglish(xpath); 164 if (previous.isEmpty() != english.emptyPrevious.contains(xpath)) { 165 System.out.println("previous.isEmpty() != original" + locale + "\t" + StringId.getId(xpath) + "\t" 166 + xpath); 167 needPrevious.add(xpath); 168 ++errorCount; 169 } 170 } 171 } 172 if (errorCount != 0) { 173 throw new IllegalArgumentException("Done, but " + errorCount + " errors"); 174 } else { 175 System.out.println("Done, no errors"); 176 } 177 } 178 179 static class Births { 180 final Relation<Versions, String> birthToPaths; 181 final Map<String, Row.R3<Versions, String, String>> pathToBirthCurrentPrevious; 182 final String locale; 183 static final Pattern TYPE = PatternCache.get("\\[@type=\"([^\"]*)\""); 184 final Matcher typeMatcher = TYPE.matcher(""); 185 Set<String> emptyPrevious = new HashSet<String>(); 186 Births(String file)187 Births(String file) { 188 locale = file; 189 CLDRFile[] files = new CLDRFile[factories.length]; 190 for (int i = 0; i < factories.length; ++i) { 191 try { 192 files[i] = factories[i].make(file, false); 193 } catch (Exception e) { 194 //e.printStackTrace(); 195 break; 196 } 197 } 198 birthToPaths = Relation.of(new TreeMap<Versions, Set<String>>(), TreeSet.class); 199 pathToBirthCurrentPrevious = new HashMap<String, Row.R3<Versions, String, String>>(); 200 for (String xpath : files[0]) { 201 202 xpath = xpath.intern(); 203 String base = files[0].getStringValue(xpath); 204 String previousValue = null; 205 int i; 206 for (i = 1; i < files.length && files[i] != null; ++i) { 207 String previous = files[i].getStringValue(xpath); 208 if (previous == null) { 209 previous = fixNullPrevious(xpath); 210 } 211 if (!CharSequences.equals(base, previous)) { 212 if (previous != null) { 213 previousValue = previous; 214 } 215 break; 216 } 217 } 218 Versions version = VERSIONS[i - 1]; 219 birthToPaths.put(version, xpath); 220 pathToBirthCurrentPrevious.put(xpath, Row.of(version, base, previousValue)); 221 } 222 } 223 fixNullPrevious(String xpath)224 private String fixNullPrevious(String xpath) { 225 if (typeMatcher.reset(xpath).find()) { 226 String type = typeMatcher.group(1); 227 if (xpath.contains("metazone")) { 228 return type.replace("_", " "); 229 } else if (xpath.contains("zone")) { 230 String[] splits = type.split("/"); 231 return splits[splits.length - 1].replace("_", " "); 232 } 233 return type; 234 } 235 return null; 236 } 237 writeBirthValues(String file)238 public void writeBirthValues(String file) throws IOException { 239 DataOutputStream dataOut = new DataOutputStream(new FileOutputStream(file)); 240 System.out.println("Writing data: " + new File(file).getCanonicalPath()); 241 dataOut.writeInt(pathToBirthCurrentPrevious.size()); 242 243 // Load and process all the locales 244 245 //TreeMap<String, Set<String>> localeToNewer = new TreeMap<String, Set<String>>(); 246 for (Entry<String, R3<Versions, String, String>> entry : pathToBirthCurrentPrevious.entrySet()) { 247 String path = entry.getKey(); 248 R3<Versions, String, String> birthCurrentPrevious = entry.getValue(); 249 String previous = birthCurrentPrevious.get2(); 250 long id = StringId.getId(path); 251 dataOut.writeLong(id); 252 final String previousString = previous == null ? "" : previous; 253 dataOut.writeUTF(previousString); 254 if (previousString.isEmpty()) { 255 emptyPrevious.add(path); 256 } 257 if (DEBUG) { 258 System.out.println(id + "\t" + previous); 259 } 260 } 261 dataOut.writeUTF("$END$"); 262 dataOut.close(); 263 emptyPrevious = Collections.unmodifiableSet(emptyPrevious); 264 } 265 writeBirth(PrintWriter out, Births onlyNewer)266 Set<String> writeBirth(PrintWriter out, Births onlyNewer) { 267 Set<String> newer = new HashSet<String>(); 268 HashMap<Long, String> sanityCheck = new HashMap<Long, String>(); 269 Versions onlyNewerVersion = Versions.trunk; 270 String otherValue = ""; 271 String olderOtherValue = ""; 272 for (Entry<Versions, Set<String>> entry2 : birthToPaths.keyValuesSet()) { 273 Versions version = entry2.getKey(); 274 for (String xpath : entry2.getValue()) { 275 long id = StringId.getId(xpath); 276 String old = sanityCheck.get(id); 277 if (old != null) { 278 throw new IllegalArgumentException("Path Collision " + xpath + ", old:" + old + ", id: " + id); 279 } else { 280 sanityCheck.put(id, xpath); 281 } 282 R3<Versions, String, String> info = pathToBirthCurrentPrevious.get(xpath); 283 if (onlyNewer != null) { 284 285 R3<Versions, String, String> otherInfo = onlyNewer.pathToBirthCurrentPrevious.get(xpath); 286 if (otherInfo == null) { 287 continue; 288 } 289 // skip if older or same 290 onlyNewerVersion = otherInfo.get0(); 291 if (version.compareTo(onlyNewerVersion) <= 0) { 292 continue; 293 } 294 otherValue = fixNull(otherInfo.get1()); 295 olderOtherValue = fixNull(otherInfo.get2()); 296 newer.add(xpath); 297 } 298 String value = fixNull(info.get1()); 299 String olderValue = fixNull(info.get2()); 300 301 out.println(locale 302 + "\t" + version 303 + "\t" + value 304 + "\t" + olderValue 305 + "\t" + onlyNewerVersion 306 + "\t" + otherValue 307 + "\t" + olderOtherValue 308 + "\t" + xpath); 309 310 } 311 } 312 return newer; 313 } 314 fixNull(String value)315 private String fixNull(String value) { 316 if (value == null) { 317 value = "∅"; 318 } 319 return value; 320 } 321 writeBirth(String directory, String filename, Births onlyNewer)322 Set<String> writeBirth(String directory, String filename, Births onlyNewer) throws IOException { 323 PrintWriter out = FileUtilities.openUTF8Writer(directory, filename + ".txt"); 324 Set<String> newer = writeBirth(out, onlyNewer); 325 out.close(); 326 return newer; 327 } 328 } 329 } 330