• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2004, International Business Machines Corporation and        *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.tool;
8 
9 import java.io.BufferedReader;
10 import java.io.File;
11 import java.io.IOException;
12 import java.io.PrintWriter;
13 import java.util.ArrayList;
14 import java.util.Collection;
15 import java.util.Comparator;
16 import java.util.Iterator;
17 import java.util.LinkedHashSet;
18 import java.util.List;
19 import java.util.Map;
20 import java.util.Set;
21 import java.util.TreeMap;
22 import java.util.TreeSet;
23 
24 import org.unicode.cldr.draft.FileUtilities;
25 import org.unicode.cldr.util.ArrayComparator;
26 import org.unicode.cldr.util.CLDRFile;
27 import org.unicode.cldr.util.CLDRFile.DraftStatus;
28 import org.unicode.cldr.util.Factory;
29 import org.unicode.cldr.util.Log;
30 import org.unicode.cldr.util.PathUtilities;
31 import org.unicode.cldr.util.SimpleFactory;
32 import org.unicode.cldr.util.XPathParts;
33 
34 import com.ibm.icu.impl.Utility;
35 import com.ibm.icu.text.Transliterator;
36 import com.ibm.icu.text.UTF16;
37 
38 /**
39  * Tool for applying modifications to the CLDR files. Use -h to see the options.
40  * <p>
41  * There are some environment variables that can be used with the program <br>
42  * -DSHOW_FILES=<anything> shows all create/open of files.
43  */
44 public class VettingAdder {
45 
46     private Map<String, Set<String>> locale_files = new TreeMap<>();
47     private Comparator<String> scomp = new UTF16.StringComparator();
48     private Set<Object[]> conflictSet = new TreeSet<Object[]>(
49         new ArrayComparator(new Comparator[] { scomp, scomp, scomp }));
50 
VettingAdder(String sourceDirectory)51     public VettingAdder(String sourceDirectory) throws IOException {
52         addFiles(sourceDirectory);
53     }
54 
addFiles(String sourceDirectory)55     private void addFiles(String sourceDirectory) throws IOException {
56         File f = new File(sourceDirectory);
57         String normalizedPath = PathUtilities.getNormalizedPathString(f);
58         if (!f.isDirectory()) {
59             String name = f.getName();
60             if (name.startsWith("fixed-")) return; // skip
61             if (name.equals(".htaccess")) return; // skip
62             if (!name.endsWith(".xml")) {
63                 Log.logln("Wrong filename format: " + PathUtilities.getNormalizedPathString(f));
64                 return;
65             }
66             String localeName = name.substring(0, name.length() - 4);
67             Set<String> s = locale_files.get(localeName);
68             if (s == null) {
69                 locale_files.put(localeName, s = new TreeSet<>());
70             }
71             s.add(f.getParent());
72         } else {
73             String[] subnames = f.list();
74             for (int i = 0; i < subnames.length; ++i) {
75                 addFiles(normalizedPath + File.separatorChar + subnames[i]);
76             }
77         }
78     }
79 
80     static class VettingInfo {
81         private String value;
82         private String fullPath;
83         private String dir;
84 
VettingInfo(String dir, String fullPath, String value)85         public VettingInfo(String dir, String fullPath, String value) {
86             this.value = value;
87             this.fullPath = fullPath;
88             this.dir = dir;
89         }
90 
91         @Override
toString()92         public String toString() {
93             return "source: " + dir + ";\t value: <" + value + ">";
94         }
95 
compareByPathAndValue(VettingInfo other)96         public int compareByPathAndValue(VettingInfo other) {
97             int result;
98             if (0 != (result = fullPath.compareTo(other.fullPath))) return result;
99             if (0 != (result = value.compareTo(other.value))) return result;
100             return 0;
101         }
102     }
103 
104     static Comparator PathAndValueComparator = new Comparator() {
105         @Override
106         public int compare(Object o1, Object o2) {
107             return ((VettingInfo) o1).compareByPathAndValue((VettingInfo) o2);
108         }
109     };
110 
111     static class VettingInfoSet {
112         private Map<String, List<VettingInfo>> path_vettingInfoList = new TreeMap<>();
113 
add(String path, String dir, String fullPath, String value)114         public void add(String path, String dir, String fullPath, String value) {
115             VettingInfo vi = new VettingInfo(dir, fullPath, value);
116             List<VettingInfo> s = path_vettingInfoList.get(path);
117             if (s == null) path_vettingInfoList.put(path, s = new ArrayList<>(1));
118             s.add(vi);
119         }
120 
iterator()121         public Iterator<String> iterator() {
122             return path_vettingInfoList.keySet().iterator();
123         }
124 
get(String path)125         public Collection<VettingInfo> get(String path) {
126             return path_vettingInfoList.get(path);
127         }
128     }
129 
keySet()130     public Set<String> keySet() {
131         return locale_files.keySet();
132     }
133 
incorporateVetting(String locale, String targetDir)134     public void incorporateVetting(String locale, String targetDir) throws IOException {
135         Set<String> s = locale_files.get(locale);
136         Log.logln("Vetting Data for: " + locale);
137         VettingInfoSet accum = new VettingInfoSet();
138         for (Iterator<String> it2 = s.iterator(); it2.hasNext();) {
139             String dir = it2.next() + File.separator;
140             String fixedLocale = "fixed-" + locale + ".xml";
141             fixXML(dir, locale + ".xml", dir, fixedLocale);
142             CLDRFile cldr = SimpleFactory.makeFromFile(dir + fixedLocale, locale, DraftStatus.approved);
143             for (Iterator<String> it3 = cldr.iterator(); it3.hasNext();) {
144                 String path = it3.next();
145                 String value = cldr.getStringValue(path);
146                 String fullPath = cldr.getFullXPath(path);
147                 // skip bogus values
148                 if (value.startsWith("//ldml") || value.length() == 0) {
149                     Log.logln("Skipping: [" + value + "] for " + fullPath);
150                     continue;
151                 }
152                 accum.add(stripAlt(path), dir, stripAlt(fullPath), value);
153             }
154         }
155         // now walk though items. If there is a single value, keep it
156         // otherwise show
157         Set<VettingInfo> uniquePathAndValue = new TreeSet<VettingInfo>(PathAndValueComparator);
158         CLDRFile cldrDelta = SimpleFactory.makeFile(locale);
159         boolean gotOne = false;
160         for (Iterator<String> it2 = accum.iterator(); it2.hasNext();) {
161             String path = it2.next();
162             Collection<VettingInfo> c = accum.get(path);
163             uniquePathAndValue.clear();
164             uniquePathAndValue.addAll(c);
165             if (uniquePathAndValue.size() == 1) { // no conflict
166                 VettingInfo vi = uniquePathAndValue.iterator().next();
167                 cldrDelta.add(vi.fullPath, vi.value);
168                 gotOne = true;
169             } else { // there is a conflict
170                 conflictSet.add(new Object[] { locale, path, c });
171             }
172         }
173         if (gotOne) {
174             Log.logln("Writing: " + targetDir + locale + ".xml");
175             PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, locale + ".xml");
176             cldrDelta.write(pw);
177             pw.close();
178         } else {
179             Log.logln("No data left in: " + targetDir + locale + ".xml");
180         }
181     }
182 
showSources()183     public void showSources() {
184         for (Iterator<String> it = locale_files.keySet().iterator(); it.hasNext();) {
185             String key = it.next();
186             Set<String> s = locale_files.get(key);
187             for (Iterator<String> it2 = s.iterator(); it2.hasNext();) {
188                 Log.logln(key + " \t" + it2.next());
189                 key = "";
190             }
191         }
192     }
193 
fixXML(String inputDir, String inputFile, String outputDir, String outputFile)194     public void fixXML(String inputDir, String inputFile, String outputDir, String outputFile) throws IOException {
195         BufferedReader in = FileUtilities.openUTF8Reader(inputDir, inputFile);
196         PrintWriter out = FileUtilities.openUTF8Writer(outputDir, outputFile);
197         int haveLanguages = 0, haveScripts = 0, haveTerritories = 0, haveVariants = 0, haveKeys = 0, haveTypes = 0;
198         int inLocaleDisplayNames = 0;
199         while (true) {
200             String line = in.readLine();
201             if (line == null) break;
202             String trimmed = line.trim();
203 
204             if (inLocaleDisplayNames == 1) {
205                 haveLanguages = fixItem(out, haveLanguages, trimmed, "<language ", "languages");
206                 haveScripts = fixItem(out, haveScripts, trimmed, "<script ", "scripts");
207                 haveTerritories = fixItem(out, haveTerritories, trimmed, "<territory ", "territories");
208                 haveVariants = fixItem(out, haveVariants, trimmed, "<variant ", "variants");
209                 haveKeys = fixItem(out, haveKeys, trimmed, "<key ", "keys");
210                 haveTypes = fixItem(out, haveTypes, trimmed, "<type ", "types");
211             }
212 
213             if (trimmed.startsWith("<localeDisplayNames"))
214                 inLocaleDisplayNames = 1;
215             else if (trimmed.startsWith("</localeDisplayNames")) inLocaleDisplayNames = 2;
216 
217             out.println(line);
218         }
219         in.close();
220         out.close();
221     }
222 
223     /**
224      *
225      */
fixItem(PrintWriter out, int haveLanguages, String trimmed, String item, String fix)226     private int fixItem(PrintWriter out, int haveLanguages, String trimmed, String item, String fix) {
227         if (trimmed.startsWith(item)) {
228             if (haveLanguages == 0) {
229                 out.println("<" + fix + ">");
230                 haveLanguages = 1;
231             }
232             return haveLanguages;
233         }
234         if (haveLanguages == 1) {
235             out.println("</" + fix + ">");
236             haveLanguages = 2;
237         }
238         return haveLanguages;
239     }
240 
241     /**
242      * @return Returns the conflictSet.
243      */
getConflictSet()244     public Set<Object[]> getConflictSet() {
245         return conflictSet;
246     }
247 
248     /**
249      * @param cldrFactory
250      * @throws IOException
251      *
252      */
showFiles(Factory cldrFactory, String targetDir)253     public void showFiles(Factory cldrFactory, String targetDir) throws IOException {
254         english = cldrFactory.make("en", true);
255 
256         Log.logln("");
257         Log.logln("A. Sources");
258         Log.logln("");
259         showSources();
260 
261         Log.logln("");
262         Log.logln("B. Intermediate Results");
263         Log.logln("");
264         Set<String> vettedLocales = keySet();
265         for (Iterator<String> it = vettedLocales.iterator(); it.hasNext();) {
266             incorporateVetting(it.next(), targetDir);
267         }
268 
269         Log.logln("");
270         Log.logln("C. Conflicts");
271         Log.logln("");
272         showConflicts(cldrFactory);
273 
274         Log.logln("");
275         Log.logln("D. Missing Vetting");
276         Log.logln("");
277 
278         Set<String> availableLocales = new TreeSet<>(cldrFactory.getAvailable());
279         availableLocales.removeAll(vettedLocales);
280 
281         for (Iterator<String> it = availableLocales.iterator(); it.hasNext();) {
282             String locale = it.next();
283             CLDRFile cldr = cldrFactory.make(locale, false);
284             for (Iterator<String> it2 = cldr.iterator(); it2.hasNext();) {
285                 String path = it2.next();
286                 String fullPath = cldr.getFullXPath(path);
287                 if (fullPath.indexOf("[@draft=") >= 0) {
288                     Log.logln(locale + " \t" + english.getName(locale) + "\texample: " + fullPath);
289                     break;
290                 }
291             }
292         }
293     }
294 
295     CLDRFile english;
296 
297     /**
298      *
299      */
showConflicts(Factory cldrFactory)300     private void showConflicts(Factory cldrFactory) {
301 
302         Set<Object[]> s = getConflictSet();
303         String lastLocale = "";
304         CLDRFile cldr = null;
305         Transliterator any_latin = Transliterator.getInstance("any-latin");
306         Set<String> emails = new LinkedHashSet<>();
307         String[] pieces = new String[5];
308 
309         for (Iterator<Object[]> it = s.iterator(); it.hasNext();) {
310             Object[] items = it.next();
311             String entry = "";
312             if (!lastLocale.equals(items[0])) {
313                 showSet(emails);
314                 lastLocale = (String) items[0];
315                 cldr = cldrFactory.make(lastLocale, false);
316                 entry = "==========" + Utility.LINE_SEPARATOR + lastLocale + Utility.LINE_SEPARATOR;
317             }
318             String path = CLDRFile.getDistinguishingXPath((String) items[1], null);
319             String current = cldr.getStringValue(path);
320             entry += "\tpath:\t" + path + Utility.LINE_SEPARATOR + "\tcurrent value:\t" + getValue(any_latin, current)
321                 + Utility.LINE_SEPARATOR;
322 
323             entry += "\tEnglish value:\t" + getValue(any_latin, english.getStringValue(path)) + Utility.LINE_SEPARATOR;
324             Collection<VettingInfo> c = (Collection<VettingInfo>) items[2];
325             for (Iterator<VettingInfo> it2 = c.iterator(); it2.hasNext();) {
326                 VettingInfo vi = it2.next();
327                 entry += "\t\tvalue:\t" + getValue(any_latin, vi.value) + "\t source: " + vi.dir
328                     + Utility.LINE_SEPARATOR;
329                 // get third field, that's the email
330                 Utility.split(vi.dir, '\\', pieces);
331                 emails.add(pieces[2]);
332             }
333 
334             if (false) {
335                 System.out.println("path: " + path);
336                 for (int i = 0; i < items.length; ++i) {
337                     System.out.println("item[" + i + "]: " + items[i]);
338                 }
339             }
340             Log.logln(entry);
341         }
342         showSet(emails);
343     }
344 
345     /**
346      *
347      */
showSet(Set<String> emails)348     private void showSet(Set<String> emails) {
349         if (emails.size() == 0) return;
350         String result = "Emails:\t";
351         for (Iterator<String> it = emails.iterator(); it.hasNext();) {
352             result += it.next() + ", ";
353         }
354         result += "cldr@unicode.org";
355         emails.clear();
356         Log.logln(result);
357     }
358 
359     /**
360      *
361      */
getValue(Transliterator some, String current)362     private String getValue(Transliterator some, String current) {
363         if (current == null) current = "NULL";
364         String other = some.transliterate(current);
365         return "<" + current + ">" + (other.equals(current) ? "" : "\t[" + other + "]");
366     }
367 
368     /**
369      *
370      */
stripAlt(String path)371     private String stripAlt(String path) {
372         XPathParts tempParts = XPathParts.getFrozenInstance(path);
373         Map<String, String> x = tempParts.getAttributes(tempParts.size() - 1);
374         String value = x.get("alt");
375         if (value != null && value.startsWith("proposed")) {
376             x.remove("alt");
377             // System.out.println(path + "\t=>\t" + tempParts.toString());
378             return tempParts.toString();
379         }
380         return path;
381     }
382 }
383