• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2004, International Business Machines Corporation and        *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.tool;
8 
9 import java.io.BufferedReader;
10 import java.io.File;
11 import java.io.IOException;
12 import java.io.PrintWriter;
13 import java.util.ArrayList;
14 import java.util.Collection;
15 import java.util.Comparator;
16 import java.util.Iterator;
17 import java.util.LinkedHashSet;
18 import java.util.List;
19 import java.util.Map;
20 import java.util.Set;
21 import java.util.TreeMap;
22 import java.util.TreeSet;
23 
24 import org.unicode.cldr.draft.FileUtilities;
25 import org.unicode.cldr.util.ArrayComparator;
26 import org.unicode.cldr.util.CLDRFile;
27 import org.unicode.cldr.util.CLDRFile.DraftStatus;
28 import org.unicode.cldr.util.Factory;
29 import org.unicode.cldr.util.Log;
30 import org.unicode.cldr.util.SimpleFactory;
31 import org.unicode.cldr.util.XPathParts;
32 
33 import com.ibm.icu.impl.Utility;
34 import com.ibm.icu.text.Transliterator;
35 import com.ibm.icu.text.UTF16;
36 
37 /**
38  * Tool for applying modifications to the CLDR files. Use -h to see the options.
39  * <p>
40  * There are some environment variables that can be used with the program <br>
41  * -DSHOW_FILES=<anything> shows all create/open of files.
42  */
43 public class VettingAdder {
44 
45     private Map<String, Set<String>> locale_files = new TreeMap<String, Set<String>>();
46     private Comparator<String> scomp = new UTF16.StringComparator();
47     private Set<Object[]> conflictSet = new TreeSet<Object[]>(
48         new ArrayComparator(new Comparator[] { scomp, scomp, scomp }));
49 
VettingAdder(String sourceDirectory)50     public VettingAdder(String sourceDirectory) throws IOException {
51         addFiles(sourceDirectory);
52     }
53 
addFiles(String sourceDirectory)54     private void addFiles(String sourceDirectory) throws IOException {
55         File f = new File(sourceDirectory);
56         String canonicalName = f.getCanonicalPath();
57         if (!f.isDirectory()) {
58             String name = f.getName();
59             if (name.startsWith("fixed-")) return; // skip
60             if (name.equals(".htaccess")) return; // skip
61             if (!name.endsWith(".xml")) {
62                 Log.logln("Wrong filename format: " + f.getCanonicalPath());
63                 return;
64             }
65             String localeName = name.substring(0, name.length() - 4);
66             Set<String> s = locale_files.get(localeName);
67             if (s == null) {
68                 locale_files.put(localeName, s = new TreeSet<String>());
69             }
70             s.add(f.getParent());
71         } else {
72             String[] subnames = f.list();
73             for (int i = 0; i < subnames.length; ++i) {
74                 addFiles(canonicalName + File.separatorChar + subnames[i]);
75             }
76         }
77     }
78 
79     static class VettingInfo {
80         private String value;
81         private String fullPath;
82         private String dir;
83 
VettingInfo(String dir, String fullPath, String value)84         public VettingInfo(String dir, String fullPath, String value) {
85             this.value = value;
86             this.fullPath = fullPath;
87             this.dir = dir;
88         }
89 
toString()90         public String toString() {
91             return "source: " + dir + ";\t value: <" + value + ">";
92         }
93 
compareByPathAndValue(VettingInfo other)94         public int compareByPathAndValue(VettingInfo other) {
95             int result;
96             if (0 != (result = fullPath.compareTo(other.fullPath))) return result;
97             if (0 != (result = value.compareTo(other.value))) return result;
98             return 0;
99         }
100     }
101 
102     static Comparator PathAndValueComparator = new Comparator() {
103         public int compare(Object o1, Object o2) {
104             return ((VettingInfo) o1).compareByPathAndValue((VettingInfo) o2);
105         }
106     };
107 
108     static class VettingInfoSet {
109         private Map<String, List<VettingInfo>> path_vettingInfoList = new TreeMap<String, List<VettingInfo>>();
110 
add(String path, String dir, String fullPath, String value)111         public void add(String path, String dir, String fullPath, String value) {
112             VettingInfo vi = new VettingInfo(dir, fullPath, value);
113             List<VettingInfo> s = path_vettingInfoList.get(path);
114             if (s == null) path_vettingInfoList.put(path, s = new ArrayList<VettingInfo>(1));
115             s.add(vi);
116         }
117 
iterator()118         public Iterator<String> iterator() {
119             return path_vettingInfoList.keySet().iterator();
120         }
121 
get(String path)122         public Collection<VettingInfo> get(String path) {
123             return path_vettingInfoList.get(path);
124         }
125     }
126 
keySet()127     public Set<String> keySet() {
128         return locale_files.keySet();
129     }
130 
incorporateVetting(String locale, String targetDir)131     public void incorporateVetting(String locale, String targetDir) throws IOException {
132         Set<String> s = locale_files.get(locale);
133         Log.logln("Vetting Data for: " + locale);
134         VettingInfoSet accum = new VettingInfoSet();
135         for (Iterator<String> it2 = s.iterator(); it2.hasNext();) {
136             String dir = it2.next() + File.separator;
137             String fixedLocale = "fixed-" + locale + ".xml";
138             fixXML(dir, locale + ".xml", dir, fixedLocale);
139             CLDRFile cldr = SimpleFactory.makeFromFile(dir + fixedLocale, locale, DraftStatus.approved);
140             for (Iterator<String> it3 = cldr.iterator(); it3.hasNext();) {
141                 String path = it3.next();
142                 String value = cldr.getStringValue(path);
143                 String fullPath = cldr.getFullXPath(path);
144                 // skip bogus values
145                 if (value.startsWith("//ldml") || value.length() == 0) {
146                     Log.logln("Skipping: [" + value + "] for " + fullPath);
147                     continue;
148                 }
149                 accum.add(stripAlt(path), dir, stripAlt(fullPath), value);
150             }
151         }
152         // now walk though items. If there is a single value, keep it
153         // otherwise show
154         Set<VettingInfo> uniquePathAndValue = new TreeSet<VettingInfo>(PathAndValueComparator);
155         CLDRFile cldrDelta = SimpleFactory.makeFile(locale);
156         boolean gotOne = false;
157         for (Iterator<String> it2 = accum.iterator(); it2.hasNext();) {
158             String path = it2.next();
159             Collection<VettingInfo> c = accum.get(path);
160             uniquePathAndValue.clear();
161             uniquePathAndValue.addAll(c);
162             if (uniquePathAndValue.size() == 1) { // no conflict
163                 VettingInfo vi = uniquePathAndValue.iterator().next();
164                 cldrDelta.add(vi.fullPath, vi.value);
165                 gotOne = true;
166             } else { // there is a conflict
167                 conflictSet.add(new Object[] { locale, path, c });
168             }
169         }
170         if (gotOne) {
171             Log.logln("Writing: " + targetDir + locale + ".xml");
172             PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, locale + ".xml");
173             cldrDelta.write(pw);
174             pw.close();
175         } else {
176             Log.logln("No data left in: " + targetDir + locale + ".xml");
177         }
178     }
179 
showSources()180     public void showSources() {
181         for (Iterator<String> it = locale_files.keySet().iterator(); it.hasNext();) {
182             String key = it.next();
183             Set<String> s = locale_files.get(key);
184             for (Iterator<String> it2 = s.iterator(); it2.hasNext();) {
185                 Log.logln(key + " \t" + it2.next());
186                 key = "";
187             }
188         }
189     }
190 
fixXML(String inputDir, String inputFile, String outputDir, String outputFile)191     public void fixXML(String inputDir, String inputFile, String outputDir, String outputFile) throws IOException {
192         BufferedReader in = FileUtilities.openUTF8Reader(inputDir, inputFile);
193         PrintWriter out = FileUtilities.openUTF8Writer(outputDir, outputFile);
194         int haveLanguages = 0, haveScripts = 0, haveTerritories = 0, haveVariants = 0, haveKeys = 0, haveTypes = 0;
195         int inLocaleDisplayNames = 0;
196         while (true) {
197             String line = in.readLine();
198             if (line == null) break;
199             String trimmed = line.trim();
200 
201             if (inLocaleDisplayNames == 1) {
202                 haveLanguages = fixItem(out, haveLanguages, trimmed, "<language ", "languages");
203                 haveScripts = fixItem(out, haveScripts, trimmed, "<script ", "scripts");
204                 haveTerritories = fixItem(out, haveTerritories, trimmed, "<territory ", "territories");
205                 haveVariants = fixItem(out, haveVariants, trimmed, "<variant ", "variants");
206                 haveKeys = fixItem(out, haveKeys, trimmed, "<key ", "keys");
207                 haveTypes = fixItem(out, haveTypes, trimmed, "<type ", "types");
208             }
209 
210             if (trimmed.startsWith("<localeDisplayNames"))
211                 inLocaleDisplayNames = 1;
212             else if (trimmed.startsWith("</localeDisplayNames")) inLocaleDisplayNames = 2;
213 
214             out.println(line);
215         }
216         in.close();
217         out.close();
218     }
219 
220     /**
221      *
222      */
fixItem(PrintWriter out, int haveLanguages, String trimmed, String item, String fix)223     private int fixItem(PrintWriter out, int haveLanguages, String trimmed, String item, String fix) {
224         if (trimmed.startsWith(item)) {
225             if (haveLanguages == 0) {
226                 out.println("<" + fix + ">");
227                 haveLanguages = 1;
228             }
229             return haveLanguages;
230         }
231         if (haveLanguages == 1) {
232             out.println("</" + fix + ">");
233             haveLanguages = 2;
234         }
235         return haveLanguages;
236     }
237 
238     /**
239      * @return Returns the conflictSet.
240      */
getConflictSet()241     public Set<Object[]> getConflictSet() {
242         return conflictSet;
243     }
244 
245     /**
246      * @param cldrFactory
247      * @throws IOException
248      *
249      */
showFiles(Factory cldrFactory, String targetDir)250     public void showFiles(Factory cldrFactory, String targetDir) throws IOException {
251         english = cldrFactory.make("en", true);
252 
253         Log.logln("");
254         Log.logln("A. Sources");
255         Log.logln("");
256         showSources();
257 
258         Log.logln("");
259         Log.logln("B. Intermediate Results");
260         Log.logln("");
261         Set<String> vettedLocales = keySet();
262         for (Iterator<String> it = vettedLocales.iterator(); it.hasNext();) {
263             incorporateVetting(it.next(), targetDir);
264         }
265 
266         Log.logln("");
267         Log.logln("C. Conflicts");
268         Log.logln("");
269         showConflicts(cldrFactory);
270 
271         Log.logln("");
272         Log.logln("D. Missing Vetting");
273         Log.logln("");
274 
275         Set<String> availableLocales = new TreeSet<String>(cldrFactory.getAvailable());
276         availableLocales.removeAll(vettedLocales);
277 
278         for (Iterator<String> it = availableLocales.iterator(); it.hasNext();) {
279             String locale = it.next();
280             CLDRFile cldr = cldrFactory.make(locale, false);
281             for (Iterator<String> it2 = cldr.iterator(); it2.hasNext();) {
282                 String path = it2.next();
283                 String fullPath = cldr.getFullXPath(path);
284                 if (fullPath.indexOf("[@draft=") >= 0) {
285                     Log.logln(locale + " \t" + english.getName(locale) + "\texample: " + fullPath);
286                     break;
287                 }
288             }
289         }
290     }
291 
292     CLDRFile english;
293 
294     /**
295      *
296      */
showConflicts(Factory cldrFactory)297     private void showConflicts(Factory cldrFactory) {
298 
299         Set<Object[]> s = getConflictSet();
300         String lastLocale = "";
301         CLDRFile cldr = null;
302         Transliterator any_latin = Transliterator.getInstance("any-latin");
303         Set<String> emails = new LinkedHashSet<String>();
304         String[] pieces = new String[5];
305 
306         for (Iterator<Object[]> it = s.iterator(); it.hasNext();) {
307             Object[] items = it.next();
308             String entry = "";
309             if (!lastLocale.equals(items[0])) {
310                 showSet(emails);
311                 lastLocale = (String) items[0];
312                 cldr = cldrFactory.make(lastLocale, false);
313                 entry = "==========" + Utility.LINE_SEPARATOR + lastLocale + Utility.LINE_SEPARATOR;
314             }
315             String path = CLDRFile.getDistinguishingXPath((String) items[1], null, false);
316             String current = cldr.getStringValue(path);
317             entry += "\tpath:\t" + path + Utility.LINE_SEPARATOR + "\tcurrent value:\t" + getValue(any_latin, current)
318                 + Utility.LINE_SEPARATOR;
319 
320             entry += "\tEnglish value:\t" + getValue(any_latin, english.getStringValue(path)) + Utility.LINE_SEPARATOR;
321             Collection<VettingInfo> c = (Collection<VettingInfo>) items[2];
322             for (Iterator<VettingInfo> it2 = c.iterator(); it2.hasNext();) {
323                 VettingInfo vi = it2.next();
324                 entry += "\t\tvalue:\t" + getValue(any_latin, vi.value) + "\t source: " + vi.dir
325                     + Utility.LINE_SEPARATOR;
326                 // get third field, that's the email
327                 Utility.split(vi.dir, '\\', pieces);
328                 emails.add(pieces[2]);
329             }
330 
331             if (false) {
332                 System.out.println("path: " + path);
333                 for (int i = 0; i < items.length; ++i) {
334                     System.out.println("item[" + i + "]: " + items[i]);
335                 }
336             }
337             Log.logln(entry);
338         }
339         showSet(emails);
340     }
341 
342     /**
343      *
344      */
showSet(Set<String> emails)345     private void showSet(Set<String> emails) {
346         if (emails.size() == 0) return;
347         String result = "Emails:\t";
348         for (Iterator<String> it = emails.iterator(); it.hasNext();) {
349             result += it.next() + ", ";
350         }
351         result += "cldr@unicode.org";
352         emails.clear();
353         Log.logln(result);
354     }
355 
356     /**
357      *
358      */
getValue(Transliterator some, String current)359     private String getValue(Transliterator some, String current) {
360         if (current == null) current = "NULL";
361         String other = some.transliterate(current);
362         return "<" + current + ">" + (other.equals(current) ? "" : "\t[" + other + "]");
363     }
364 
365     XPathParts tempParts = new XPathParts(null, null);
366 
367     /**
368      *
369      */
stripAlt(String path)370     private String stripAlt(String path) {
371         tempParts.set(path);
372         Map<String, String> x = tempParts.getAttributes(tempParts.size() - 1);
373         String value = x.get("alt");
374         if (value != null && value.startsWith("proposed")) {
375             x.remove("alt");
376             // System.out.println(path + "\t=>\t" + tempParts.toString());
377             return tempParts.toString();
378         }
379         return path;
380     }
381 }