• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.io.DataOutputStream;
4 import java.io.File;
5 import java.io.FileOutputStream;
6 import java.io.IOException;
7 import java.io.PrintWriter;
8 import java.util.ArrayList;
9 import java.util.Collections;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Map.Entry;
15 import java.util.Set;
16 import java.util.TreeMap;
17 import java.util.TreeSet;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20 
21 import org.unicode.cldr.draft.FileUtilities;
22 import org.unicode.cldr.test.DisplayAndInputProcessor;
23 import org.unicode.cldr.test.OutdatedPaths;
24 import org.unicode.cldr.tool.Option.Options;
25 import org.unicode.cldr.util.CLDRFile;
26 import org.unicode.cldr.util.CLDRPaths;
27 import org.unicode.cldr.util.CldrUtility;
28 import org.unicode.cldr.util.Factory;
29 import org.unicode.cldr.util.LanguageTagParser;
30 import org.unicode.cldr.util.Pair;
31 import org.unicode.cldr.util.PathUtilities;
32 import org.unicode.cldr.util.PatternCache;
33 import org.unicode.cldr.util.SimpleFactory;
34 import org.unicode.cldr.util.StringId;
35 
36 import com.google.common.base.Joiner;
37 import com.google.common.base.Objects;
38 import com.ibm.icu.impl.Relation;
39 import com.ibm.icu.impl.Row;
40 import com.ibm.icu.impl.Row.R3;
41 import com.ibm.icu.lang.CharSequences;
42 import com.ibm.icu.util.ICUException;
43 import com.ibm.icu.util.VersionInfo;
44 
45 public class GenerateBirth {
46     private static boolean DEBUG = false;
47 
48     static CldrVersion[] VERSIONS;
49 
50     static Factory[] factories;
51 
52     final static Options myOptions = new Options()
53         .add("target", ".*", CLDRPaths.BIRTH_DATA_DIR,
54             "The target directory for building the text files that show the results.")
55         .add("log", ".*", CLDRPaths.STAGING_DIRECTORY + "births/" + CldrVersion.baseline.getVersionInfo().getVersionString(2, 4),
56             "The target directory for building the text files that show the results.")
57         .add(
58             "file",
59             ".*",
60             ".*",
61             "Filter the information based on file name, using a regex argument. The '.xml' is removed from the file before filtering")
62         .add("previous", "Stop after writing the English previous data.")
63         .add("oldest",
64             "\\d+(\\.\\d+)?",
65             "36.0",
66             "Oldest version to go back to, eg 36.1")
67         .add("debug", "Debug");
68 
main(String[] args)69     public static void main(String[] args) throws IOException {
70         System.out.println("Run TestOutdatedPaths.java -v to see a listing of changes.");
71         myOptions.parse(args, true);
72         DEBUG = myOptions.get("debug").doesOccur();
73 
74         try {
75             CldrVersion.checkVersions(); // verify versions up to date
76         } catch (Exception e) {
77             throw new ICUException("This tool can only be run if the archive of released versions matching CldrVersion is available.", e);
78         }
79 
80         // generate the list for as far as we want to go back
81 
82         VersionInfo oldest = VersionInfo.getInstance(myOptions.get("oldest").getValue());
83         List<CldrVersion> versions = new ArrayList<>();
84         boolean foundStart = false;
85         for (CldrVersion version : CldrVersion.CLDR_VERSIONS_DESCENDING) {
86             versions.add(version);
87            if (version.getVersionInfo() == oldest) {
88                foundStart = true;
89                break;
90            }
91         }
92         if (!foundStart) {
93             throw new IllegalArgumentException("The last version is " + myOptions.get("oldest").getValue() + "; it must be in: " + Joiner.on(", ").join(CldrVersion.CLDR_VERSIONS_DESCENDING));
94         }
95         VERSIONS = versions.toArray(new CldrVersion[versions.size()]);
96 
97         // set up the CLDR Factories for each version
98         factories = new Factory[VERSIONS.length]; // hack for now; should change to list
99 
100         String filePattern = myOptions.get("file").getValue();
101 
102         ArrayList<Factory> list = new ArrayList<>();
103         for (CldrVersion version : VERSIONS) {
104             if (version == CldrVersion.unknown) {
105                 continue;
106             }
107             List<File> paths = version.getPathsForFactory();
108 
109             System.out.println(version + ", " + paths);
110             Factory aFactory = SimpleFactory.make(paths.toArray(new File[paths.size()]), filePattern);
111             list.add(aFactory);
112         }
113         list.toArray(factories);
114 
115         final String dataDirectory = myOptions.get("target").getValue();
116         File dataDir = new File(dataDirectory);
117         if (!dataDir.isDirectory()) {
118             throw new IllegalArgumentException("-t value is not directory: " + dataDir);
119         }
120 
121         // load and process English
122 
123         String logDirectory = myOptions.get("log").getValue();
124 
125         System.out.println("en");
126         Births english = new Births("en");
127         english.writeBirth(logDirectory, "en", null);
128         english.writeBirthValues(dataDirectory + "/" + OutdatedPaths.OUTDATED_ENGLISH_DATA);
129 
130         Map<Long, Pair<CldrVersion, String>> pathToPrevious = new HashMap<>();
131 
132         // Verify that the write of English worked
133 
134         OutdatedPaths.readBirthValues(dataDirectory, null, pathToPrevious);
135         for (Entry<String, R3<CldrVersion, String, String>> entry : english.pathToBirthCurrentPrevious.entrySet()) {
136             String path = entry.getKey();
137             String previous = entry.getValue().get2();
138             CldrVersion birth = entry.getValue().get0();
139             if (previous == null) {
140                 previous = OutdatedPaths.NO_VALUE;
141             }
142             long id = StringId.getId(path);
143             Pair<CldrVersion, String> readValue = pathToPrevious.get(id);
144             CldrVersion birthRead = readValue == null ? null : readValue.getFirst();
145             String previousRead = readValue == null ? null : readValue.getSecond();
146             if (!Objects.equal(previous, previousRead) || !Objects.equal(birth, birthRead)) {
147                 throw new IllegalArgumentException("path: " + path
148                     + "\tprevious: " + previous + "\tread: " + readValue
149                     + "\tbirth: " + birth + "\tread: " + birthRead);
150             }
151         }
152 
153         // Set up the binary data files for all others
154 
155         File file = new File(dataDirectory + "/" + OutdatedPaths.OUTDATED_DATA);
156         final String outputDataFile = PathUtilities.getNormalizedPathString(file);
157         TreeMap<String, Set<String>> localeToNewer = new TreeMap<>();
158 
159         System.out.println("Writing data: " + outputDataFile);
160         try (DataOutputStream dataOut = new DataOutputStream(new FileOutputStream(file))) {
161             dataOut.writeUTF(OutdatedPaths.FORMAT_KEY);
162 
163             // Load and process all the locales
164 
165             LanguageTagParser ltp = new LanguageTagParser();
166             for (String fileName : factories[0].getAvailable()) {
167                 if (fileName.equals("en")) {
168                     continue;
169                 }
170                 if (!ltp.set(fileName).getRegion().isEmpty()) {
171                     continue; // skip region locales
172                 }
173                 // TODO skip default content locales
174                 System.out.println(fileName);
175                 Births other = new Births(fileName);
176                 Set<String> newer = other.writeBirth(logDirectory, fileName, english);
177 
178                 dataOut.writeUTF(fileName);
179                 dataOut.writeInt(newer.size());
180                 for (String item : newer) {
181                     long id = StringId.getId(item);
182                     dataOut.writeLong(id);
183                     if (DEBUG) {
184                         System.out.println(id + "\t" + item);
185                     }
186                 }
187                 localeToNewer.put(fileName, newer);
188             }
189             dataOut.writeUTF("$END$");
190         }
191 
192         // Doublecheck the data
193 
194         OutdatedPaths outdatedPaths = new OutdatedPaths(dataDirectory);
195         Set<String> needPrevious = new TreeSet<>();
196         int errorCount = 0;
197         for (Entry<String, Set<String>> localeAndNewer : localeToNewer.entrySet()) {
198             String locale = localeAndNewer.getKey();
199             System.out.println("Checking " + locale);
200             Set<String> newer = localeAndNewer.getValue();
201             if (newer.size() != outdatedPaths.countOutdated(locale)) {
202                 throw new IllegalArgumentException("broken: " + locale);
203             }
204             for (String xpath : newer) {
205                 boolean isOutdated = outdatedPaths.isRawOutdated(locale, xpath);
206                 if (!isOutdated) {
207                     System.out.println("Error, broken locale: " + locale + "\t" + StringId.getId(xpath) + "\t" + xpath);
208                     ++errorCount;
209                 }
210                 if (outdatedPaths.isSkipped(xpath)) {
211                     continue;
212                 }
213                 String previous = outdatedPaths.getPreviousEnglish(xpath);
214                 if (previous.isEmpty() != english.emptyPrevious.contains(xpath)) {
215                     System.out.println("previous.isEmpty() != original " + locale + "\t" + StringId.getId(xpath) + "\t"
216                         + xpath);
217                     needPrevious.add(xpath);
218                     ++errorCount;
219                 }
220             }
221         }
222         if (errorCount != 0) {
223             throw new IllegalArgumentException("Done, but " + errorCount + " errors");
224         } else {
225             System.out.println("Done, no errors");
226         }
227     }
228 
229     static class Births {
230         private static final boolean USE_RESOLVED = false;
231         final Relation<CldrVersion, String> birthToPaths;
232         final Map<String, Row.R3<CldrVersion, String, String>> pathToBirthCurrentPrevious;
233         final String locale;
234         static final Pattern TYPE = PatternCache.get("\\[@type=\"([^\"]*)\"");
235         final Matcher typeMatcher = TYPE.matcher("");
236         Set<String> emptyPrevious = new HashSet<>();
237 
Births(String file)238         Births(String file) {
239             locale = file;
240 
241             CLDRFile[] files = new CLDRFile[factories.length];
242             DisplayAndInputProcessor[] processors = new DisplayAndInputProcessor[factories.length];
243 
244             for (int i = 0; i < factories.length; ++i) {
245                 try {
246                     files[i] = factories[i].make(file, USE_RESOLVED);
247                     processors[i] = new DisplayAndInputProcessor(files[i], false);
248                 } catch (Exception e) {
249                     // stop when we fail to find
250                     System.out.println("Stopped at " + file + ", " + CldrVersion.CLDR_VERSIONS_DESCENDING.get(i));
251                     //e.printStackTrace();
252                     break;
253                 }
254             }
255             birthToPaths = Relation.of(new TreeMap<CldrVersion, Set<String>>(), TreeSet.class);
256             pathToBirthCurrentPrevious = new HashMap<>();
257             for (String xpath : files[0]) {
258                 xpath = xpath.intern();
259                 if (xpath.contains("[@type=\"ar\"]")) {
260                     int debug = 0;
261                 }
262                 String base = getProcessedStringValue(0, xpath, files, processors);
263 
264                 String previousValue = null;
265                 int i;
266                 CLDRFile lastFile = files[0];
267                 for (i = 1; i < files.length && files[i] != null; ++i) {
268                     String previous = getProcessedStringValue(i, xpath, files, processors);
269                     if (previous == null) {
270                         previous = OutdatedPaths.NO_VALUE; // fixNullPrevious(xpath);
271                     }
272                     if (!CharSequences.equals(base, previous)) {
273                         if (previous != null) {
274                             previousValue = previous;
275                         }
276                         break;
277                     }
278                     lastFile = files[i];
279                 }
280                 CldrVersion version = CldrVersion.from(lastFile.getDtdVersionInfo());
281                 birthToPaths.put(version, xpath);
282                 pathToBirthCurrentPrevious.put(xpath, Row.of(version, base, previousValue));
283             }
284         }
285 
getProcessedStringValue(int fileNumber, String xpath, CLDRFile[] files, DisplayAndInputProcessor[] processors)286         public String getProcessedStringValue(int fileNumber, String xpath, CLDRFile[] files, DisplayAndInputProcessor[] processors) {
287             String base = files[fileNumber].getStringValue(xpath);
288             if (base != null) {
289                 base = processors[fileNumber].processInput(xpath, base, null);
290             }
291             return base;
292         }
293 
fixNullPrevious(String xpath)294         private String fixNullPrevious(String xpath) {
295             if (typeMatcher.reset(xpath).find()) {
296                 String type = typeMatcher.group(1);
297                 if (xpath.contains("metazone")) {
298                     return type.replace("_", " ");
299                 } else if (xpath.contains("zone")) {
300                     String[] splits = type.split("/");
301                     return splits[splits.length - 1].replace("_", " ");
302                 }
303                 return type;
304             }
305             return null;
306         }
307 
writeBirthValues(String file)308         public void writeBirthValues(String file) throws IOException {
309             try (DataOutputStream dataOut = new DataOutputStream(new FileOutputStream(file))) {
310                 dataOut.writeUTF(OutdatedPaths.FORMAT_KEY);
311                 System.out.println("Writing data: " + PathUtilities.getNormalizedPathString(file));
312                 dataOut.writeInt(pathToBirthCurrentPrevious.size());
313 
314                 // Load and process all the locales
315 
316                 //TreeMap<String, Set<String>> localeToNewer = new TreeMap<String, Set<String>>();
317                 for (Entry<String, R3<CldrVersion, String, String>> entry : pathToBirthCurrentPrevious.entrySet()) {
318                     String path = entry.getKey();
319                     R3<CldrVersion, String, String> birthCurrentPrevious = entry.getValue();
320                     CldrVersion birth = birthCurrentPrevious.get0();
321                     String current = birthCurrentPrevious.get1();
322                     String previous = birthCurrentPrevious.get2();
323                     long id = StringId.getId(path);
324                     dataOut.writeLong(id);
325                     final String previousString = previous == null ? OutdatedPaths.NO_VALUE : previous;
326                     dataOut.writeUTF(previousString);
327                     if (previous == null) {
328                         emptyPrevious.add(path);
329                     }
330                     dataOut.writeUTF(birth.toString());
331                     if (true) {
332                         System.out.println(id + "\t" + birth + "\t«" + current + "⇐" + previous + "»");
333                     }
334                 }
335                 dataOut.writeUTF("$END$");
336                 emptyPrevious = Collections.unmodifiableSet(emptyPrevious);
337             }
338         }
339 
writeBirth(PrintWriter out, Births onlyNewer)340         Set<String> writeBirth(PrintWriter out, Births onlyNewer) {
341 
342             out.println("Loc\tVersion\tValue\tPrevValue\tEVersion\tEValue\tEPrevValue\tPath");
343 
344             Set<String> newer = new HashSet<>();
345             HashMap<Long, String> sanityCheck = new HashMap<>();
346             CldrVersion onlyNewerVersion = null;
347             String otherValue = "n/a";
348             String olderOtherValue = "n/a";
349             for (Entry<CldrVersion, Set<String>> entry2 : birthToPaths.keyValuesSet()) {
350                 CldrVersion version = entry2.getKey();
351                 for (String xpath : entry2.getValue()) {
352                     long id = StringId.getId(xpath);
353                     String old = sanityCheck.get(id);
354                     if (old != null) {
355                         throw new IllegalArgumentException("Path Collision " + xpath + ", old:" + old + ", id: " + id);
356                     } else {
357                         sanityCheck.put(id, xpath);
358                     }
359                     R3<CldrVersion, String, String> info = pathToBirthCurrentPrevious.get(xpath);
360                     if (onlyNewer != null) {
361 
362                         R3<CldrVersion, String, String> otherInfo = onlyNewer.pathToBirthCurrentPrevious.get(xpath);
363                         if (otherInfo == null) {
364                             continue;
365                         }
366                         // skip if not older than "comparison version"
367                         onlyNewerVersion = otherInfo.get0();
368                         if (!version.isOlderThan(onlyNewerVersion)) {
369                             continue;
370                         }
371                         otherValue = fixNull(otherInfo.get1());
372                         olderOtherValue = fixNull(otherInfo.get2());
373                         newer.add(xpath);
374                     }
375                     String value = fixNull(info.get1());
376                     String olderValue = fixNull(info.get2());
377 
378                     out.println(locale
379                         + "\t" + version
380                         + "\t" + value
381                         + "\t" + olderValue
382                         + "\t" + CldrUtility.ifNull(onlyNewerVersion, "n/a")
383                         + "\t" + otherValue
384                         + "\t" + olderOtherValue
385                         + "\t" + xpath);
386 
387                 }
388             }
389             return newer;
390         }
391 
fixNull(String value)392         private String fixNull(String value) {
393             if (value == null) {
394                 value = OutdatedPaths.NO_VALUE;
395             }
396             return value;
397         }
398 
writeBirth(String directory, String filename, Births onlyNewer)399         Set<String> writeBirth(String directory, String filename, Births onlyNewer) throws IOException {
400             try (PrintWriter out = FileUtilities.openUTF8Writer(directory, filename + ".txt")) {
401                 Set<String> newer = writeBirth(out, onlyNewer);
402                 return newer;
403             }
404         }
405     }
406 }
407