1 package org.unicode.cldr.tool; 2 3 import java.io.DataOutputStream; 4 import java.io.File; 5 import java.io.FileOutputStream; 6 import java.io.IOException; 7 import java.io.PrintWriter; 8 import java.util.ArrayList; 9 import java.util.Collections; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.List; 13 import java.util.Map; 14 import java.util.Map.Entry; 15 import java.util.Set; 16 import java.util.TreeMap; 17 import java.util.TreeSet; 18 import java.util.regex.Matcher; 19 import java.util.regex.Pattern; 20 21 import org.unicode.cldr.draft.FileUtilities; 22 import org.unicode.cldr.test.OutdatedPaths; 23 import org.unicode.cldr.tool.Option.Options; 24 import org.unicode.cldr.util.CLDRConfig; 25 import org.unicode.cldr.util.CLDRFile; 26 import org.unicode.cldr.util.CLDRPaths; 27 import org.unicode.cldr.util.CldrUtility; 28 import org.unicode.cldr.util.Factory; 29 import org.unicode.cldr.util.LanguageTagParser; 30 import org.unicode.cldr.util.Pair; 31 import org.unicode.cldr.util.PathUtilities; 32 import org.unicode.cldr.util.PatternCache; 33 import org.unicode.cldr.util.SimpleFactory; 34 import org.unicode.cldr.util.StringId; 35 36 import com.google.common.base.Objects; 37 import com.ibm.icu.impl.Relation; 38 import com.ibm.icu.impl.Row; 39 import com.ibm.icu.impl.Row.R3; 40 import com.ibm.icu.lang.CharSequences; 41 import com.ibm.icu.util.ICUException; 42 43 public class GenerateBirth { 44 private static boolean DEBUG = false; 45 46 private static final List<CldrVersion> VERSIONS_WITH_TRUNK_DESCENDING = CldrVersion.CLDR_VERSIONS_DESCENDING; 47 48 static final CldrVersion[] VERSIONS = VERSIONS_WITH_TRUNK_DESCENDING.toArray( 49 new CldrVersion[VERSIONS_WITH_TRUNK_DESCENDING.size()]); // hack for now; should change to list 50 51 static final Factory[] factories = new Factory[VERSIONS.length - 1]; // hack for now; should change to list 52 53 final static Options myOptions = new Options() 54 .add("target", ".*", CLDRPaths.BIRTH_DATA_DIR, 55 "The target directory for building the text files that show the results.") 56 .add("log", ".*", CLDRPaths.AUX_DIRECTORY + "births/" + CldrVersion.trunk.getVersionInfo().getVersionString(2, 4), 57 "The target directory for building the text files that show the results.") 58 .add( 59 "file", 60 ".*", 61 ".*", 62 "Filter the information based on file name, using a regex argument. The '.xml' is removed from the file before filtering") 63 .add("previous", "Stop after writing the English previous data.") 64 .add("debug", "Debug"); 65 main(String[] args)66 public static void main(String[] args) throws IOException { 67 System.out.println("Run TestOutdatedPaths.java -v to see a listing of changes."); 68 myOptions.parse(args, true); 69 try { 70 CldrVersion.checkVersions(); // verify versions up to date 71 } catch (Exception e) { 72 throw new ICUException("This tool can only be run if the archive of released versions matching CldrVersion is available.", e); 73 } 74 75 // set up the CLDR Factories 76 77 DEBUG = myOptions.get("debug").doesOccur(); 78 79 final CLDRConfig config = CLDRConfig.getInstance(); 80 81 String filePattern = myOptions.get("file").getValue(); 82 83 ArrayList<Factory> list = new ArrayList<>(); 84 for (CldrVersion version : VERSIONS) { 85 if (version == CldrVersion.unknown) { 86 continue; 87 } 88 List<File> paths = version.getPathsForFactory(); 89 // String base = version.getBaseDirectory(); 90 // File[] paths = version.compareTo(CldrVersion.v27_0) > 0 ? // warning, order is reversed 91 // new File[] { new File(base + "common/main/") } : 92 // new File[] { new File(base + "common/main/"), new File(base + "common/annotations/") }; 93 94 System.out.println(version + ", " + paths); 95 Factory aFactory = SimpleFactory.make(paths.toArray(new File[paths.size()]), filePattern); 96 list.add(aFactory); 97 } 98 list.toArray(factories); 99 100 final String dataDirectory = myOptions.get("target").getValue(); 101 File dataDir = new File(dataDirectory); 102 if (!dataDir.isDirectory()) { 103 throw new IllegalArgumentException("-t value is not directory: " + dataDir); 104 } 105 106 // load and process English 107 108 String logDirectory = myOptions.get("log").getValue(); 109 110 System.out.println("en"); 111 Births english = new Births("en"); 112 english.writeBirth(logDirectory, "en", null); 113 english.writeBirthValues(dataDirectory + "/" + OutdatedPaths.OUTDATED_ENGLISH_DATA); 114 115 Map<Long, Pair<CldrVersion, String>> pathToPrevious = new HashMap<>(); 116 117 // Verify that the write of English worked 118 119 OutdatedPaths.readBirthValues(dataDirectory, null, pathToPrevious); 120 for (Entry<String, R3<CldrVersion, String, String>> entry : english.pathToBirthCurrentPrevious.entrySet()) { 121 String path = entry.getKey(); 122 String previous = entry.getValue().get2(); 123 CldrVersion birth = entry.getValue().get0(); 124 if (previous == null) { 125 previous = OutdatedPaths.NO_VALUE; 126 } 127 long id = StringId.getId(path); 128 Pair<CldrVersion, String> readValue = pathToPrevious.get(id); 129 CldrVersion birthRead = readValue == null ? null : readValue.getFirst(); 130 String previousRead = readValue == null ? null : readValue.getSecond(); 131 if (!Objects.equal(previous, previousRead) || !Objects.equal(birth, birthRead)) { 132 throw new IllegalArgumentException("path: " + path 133 + "\tprevious: " + previous + "\tread: " + readValue 134 + "\tbirth: " + birth + "\tread: " + birthRead); 135 } 136 } 137 138 // Set up the binary data files for all others 139 140 File file = new File(dataDirectory + "/" + OutdatedPaths.OUTDATED_DATA); 141 final String outputDataFile = PathUtilities.getNormalizedPathString(file); 142 System.out.println("Writing data: " + outputDataFile); 143 DataOutputStream dataOut = new DataOutputStream(new FileOutputStream(file)); 144 dataOut.writeUTF(OutdatedPaths.FORMAT_KEY); 145 146 // Load and process all the locales 147 148 TreeMap<String, Set<String>> localeToNewer = new TreeMap<>(); 149 LanguageTagParser ltp = new LanguageTagParser(); 150 for (String fileName : factories[0].getAvailable()) { 151 if (fileName.equals("en")) { 152 continue; 153 } 154 if (!ltp.set(fileName).getRegion().isEmpty()) { 155 continue; // skip region locales 156 } 157 // TODO skip default content locales 158 System.out.println(fileName); 159 Births other = new Births(fileName); 160 Set<String> newer = other.writeBirth(logDirectory, fileName, english); 161 162 dataOut.writeUTF(fileName); 163 dataOut.writeInt(newer.size()); 164 for (String item : newer) { 165 long id = StringId.getId(item); 166 dataOut.writeLong(id); 167 if (DEBUG) { 168 System.out.println(id + "\t" + item); 169 } 170 } 171 localeToNewer.put(fileName, newer); 172 } 173 dataOut.writeUTF("$END$"); 174 dataOut.close(); 175 176 // Doublecheck the data 177 178 OutdatedPaths outdatedPaths = new OutdatedPaths(dataDirectory); 179 Set<String> needPrevious = new TreeSet<>(); 180 int errorCount = 0; 181 for (Entry<String, Set<String>> localeAndNewer : localeToNewer.entrySet()) { 182 String locale = localeAndNewer.getKey(); 183 System.out.println("Checking " + locale); 184 Set<String> newer = localeAndNewer.getValue(); 185 if (newer.size() != outdatedPaths.countOutdated(locale)) { 186 throw new IllegalArgumentException("broken: " + locale); 187 } 188 for (String xpath : newer) { 189 boolean isOutdated = outdatedPaths.isRawOutdated(locale, xpath); 190 if (!isOutdated) { 191 System.out.println("Error, broken locale: " + locale + "\t" + StringId.getId(xpath) + "\t" + xpath); 192 ++errorCount; 193 } 194 if (outdatedPaths.isSkipped(xpath)) { 195 continue; 196 } 197 String previous = outdatedPaths.getPreviousEnglish(xpath); 198 if (previous.isEmpty() != english.emptyPrevious.contains(xpath)) { 199 System.out.println("previous.isEmpty() != original " + locale + "\t" + StringId.getId(xpath) + "\t" 200 + xpath); 201 needPrevious.add(xpath); 202 ++errorCount; 203 } 204 } 205 } 206 if (errorCount != 0) { 207 throw new IllegalArgumentException("Done, but " + errorCount + " errors"); 208 } else { 209 System.out.println("Done, no errors"); 210 } 211 } 212 213 static class Births { 214 final Relation<CldrVersion, String> birthToPaths; 215 final Map<String, Row.R3<CldrVersion, String, String>> pathToBirthCurrentPrevious; 216 final String locale; 217 static final Pattern TYPE = PatternCache.get("\\[@type=\"([^\"]*)\""); 218 final Matcher typeMatcher = TYPE.matcher(""); 219 Set<String> emptyPrevious = new HashSet<>(); 220 Births(String file)221 Births(String file) { 222 locale = file; 223 CLDRFile[] files = new CLDRFile[factories.length]; 224 for (int i = 0; i < factories.length; ++i) { 225 try { 226 files[i] = factories[i].make(file, false); 227 } catch (Exception e) { 228 // stop when we fail to find 229 System.out.println("Stopped at " + file + ", " + CldrVersion.CLDR_VERSIONS_DESCENDING.get(i)); 230 //e.printStackTrace(); 231 break; 232 } 233 } 234 birthToPaths = Relation.of(new TreeMap<CldrVersion, Set<String>>(), TreeSet.class); 235 pathToBirthCurrentPrevious = new HashMap<>(); 236 for (String xpath : files[0]) { 237 xpath = xpath.intern(); 238 if (xpath.contains("[@type=\"ar\"]")) { 239 int debug = 0; 240 } 241 String base = files[0].getStringValue(xpath); 242 String previousValue = null; 243 int i; 244 CLDRFile lastFile = files[0]; 245 for (i = 1; i < files.length && files[i] != null; ++i) { 246 String previous = files[i].getStringValue(xpath); 247 if (previous == null) { 248 previous = OutdatedPaths.NO_VALUE; // fixNullPrevious(xpath); 249 } 250 if (!CharSequences.equals(base, previous)) { 251 if (previous != null) { 252 previousValue = previous; 253 } 254 break; 255 } 256 lastFile = files[i]; 257 } 258 CldrVersion version = CldrVersion.from(lastFile.getDtdVersionInfo()); 259 birthToPaths.put(version, xpath); 260 pathToBirthCurrentPrevious.put(xpath, Row.of(version, base, previousValue)); 261 } 262 } 263 fixNullPrevious(String xpath)264 private String fixNullPrevious(String xpath) { 265 if (typeMatcher.reset(xpath).find()) { 266 String type = typeMatcher.group(1); 267 if (xpath.contains("metazone")) { 268 return type.replace("_", " "); 269 } else if (xpath.contains("zone")) { 270 String[] splits = type.split("/"); 271 return splits[splits.length - 1].replace("_", " "); 272 } 273 return type; 274 } 275 return null; 276 } 277 writeBirthValues(String file)278 public void writeBirthValues(String file) throws IOException { 279 DataOutputStream dataOut = new DataOutputStream(new FileOutputStream(file)); 280 dataOut.writeUTF(OutdatedPaths.FORMAT_KEY); 281 System.out.println("Writing data: " + PathUtilities.getNormalizedPathString(file)); 282 dataOut.writeInt(pathToBirthCurrentPrevious.size()); 283 284 // Load and process all the locales 285 286 //TreeMap<String, Set<String>> localeToNewer = new TreeMap<String, Set<String>>(); 287 for (Entry<String, R3<CldrVersion, String, String>> entry : pathToBirthCurrentPrevious.entrySet()) { 288 String path = entry.getKey(); 289 R3<CldrVersion, String, String> birthCurrentPrevious = entry.getValue(); 290 CldrVersion birth = birthCurrentPrevious.get0(); 291 String current = birthCurrentPrevious.get1(); 292 String previous = birthCurrentPrevious.get2(); 293 long id = StringId.getId(path); 294 dataOut.writeLong(id); 295 final String previousString = previous == null ? OutdatedPaths.NO_VALUE : previous; 296 dataOut.writeUTF(previousString); 297 if (previous == null) { 298 emptyPrevious.add(path); 299 } 300 dataOut.writeUTF(birth.toString()); 301 if (true) { 302 System.out.println(id + "\t" + birth + "\t«" + current + "⇐" + previous + "»"); 303 } 304 } 305 dataOut.writeUTF("$END$"); 306 dataOut.close(); 307 emptyPrevious = Collections.unmodifiableSet(emptyPrevious); 308 } 309 writeBirth(PrintWriter out, Births onlyNewer)310 Set<String> writeBirth(PrintWriter out, Births onlyNewer) { 311 312 out.println("Loc\tVersion\tValue\tPrevValue\tEVersion\tEValue\tEPrevValue\tPath"); 313 314 Set<String> newer = new HashSet<>(); 315 HashMap<Long, String> sanityCheck = new HashMap<>(); 316 CldrVersion onlyNewerVersion = null; 317 String otherValue = "n/a"; 318 String olderOtherValue = "n/a"; 319 for (Entry<CldrVersion, Set<String>> entry2 : birthToPaths.keyValuesSet()) { 320 CldrVersion version = entry2.getKey(); 321 for (String xpath : entry2.getValue()) { 322 long id = StringId.getId(xpath); 323 String old = sanityCheck.get(id); 324 if (old != null) { 325 throw new IllegalArgumentException("Path Collision " + xpath + ", old:" + old + ", id: " + id); 326 } else { 327 sanityCheck.put(id, xpath); 328 } 329 R3<CldrVersion, String, String> info = pathToBirthCurrentPrevious.get(xpath); 330 if (onlyNewer != null) { 331 332 R3<CldrVersion, String, String> otherInfo = onlyNewer.pathToBirthCurrentPrevious.get(xpath); 333 if (otherInfo == null) { 334 continue; 335 } 336 // skip if not older than "comparison version" 337 onlyNewerVersion = otherInfo.get0(); 338 if (!version.isOlderThan(onlyNewerVersion)) { 339 continue; 340 } 341 otherValue = fixNull(otherInfo.get1()); 342 olderOtherValue = fixNull(otherInfo.get2()); 343 newer.add(xpath); 344 } 345 String value = fixNull(info.get1()); 346 String olderValue = fixNull(info.get2()); 347 348 out.println(locale 349 + "\t" + version 350 + "\t" + value 351 + "\t" + olderValue 352 + "\t" + CldrUtility.ifNull(onlyNewerVersion, "n/a") 353 + "\t" + otherValue 354 + "\t" + olderOtherValue 355 + "\t" + xpath); 356 357 } 358 } 359 return newer; 360 } 361 fixNull(String value)362 private String fixNull(String value) { 363 if (value == null) { 364 value = OutdatedPaths.NO_VALUE; 365 } 366 return value; 367 } 368 writeBirth(String directory, String filename, Births onlyNewer)369 Set<String> writeBirth(String directory, String filename, Births onlyNewer) throws IOException { 370 PrintWriter out = FileUtilities.openUTF8Writer(directory, filename + ".txt"); 371 Set<String> newer = writeBirth(out, onlyNewer); 372 out.close(); 373 return newer; 374 } 375 } 376 } 377