• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2004, International Business Machines Corporation and        *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.tool;
8 
9 import com.ibm.icu.impl.Utility;
10 import com.ibm.icu.text.Transliterator;
11 import com.ibm.icu.text.UTF16;
12 import java.io.BufferedReader;
13 import java.io.File;
14 import java.io.IOException;
15 import java.io.PrintWriter;
16 import java.util.ArrayList;
17 import java.util.Collection;
18 import java.util.Comparator;
19 import java.util.Iterator;
20 import java.util.LinkedHashSet;
21 import java.util.List;
22 import java.util.Map;
23 import java.util.Set;
24 import java.util.TreeMap;
25 import java.util.TreeSet;
26 import org.unicode.cldr.draft.FileUtilities;
27 import org.unicode.cldr.util.ArrayComparator;
28 import org.unicode.cldr.util.CLDRFile;
29 import org.unicode.cldr.util.CLDRFile.DraftStatus;
30 import org.unicode.cldr.util.Factory;
31 import org.unicode.cldr.util.Log;
32 import org.unicode.cldr.util.PathUtilities;
33 import org.unicode.cldr.util.SimpleFactory;
34 import org.unicode.cldr.util.XPathParts;
35 
36 /**
37  * Tool for applying modifications to the CLDR files. Use -h to see the options.
38  *
39  * <p>There are some environment variables that can be used with the program <br>
40  * -DSHOW_FILES=<anything> shows all create/open of files.
41  */
42 public class VettingAdder {
43 
44     private Map<String, Set<String>> locale_files = new TreeMap<>();
45     private Comparator<String> scomp = new UTF16.StringComparator();
46     private Set<Object[]> conflictSet =
47             new TreeSet<Object[]>(new ArrayComparator(new Comparator[] {scomp, scomp, scomp}));
48 
VettingAdder(String sourceDirectory)49     public VettingAdder(String sourceDirectory) throws IOException {
50         addFiles(sourceDirectory);
51     }
52 
addFiles(String sourceDirectory)53     private void addFiles(String sourceDirectory) throws IOException {
54         File f = new File(sourceDirectory);
55         String normalizedPath = PathUtilities.getNormalizedPathString(f);
56         if (!f.isDirectory()) {
57             String name = f.getName();
58             if (name.startsWith("fixed-")) return; // skip
59             if (name.equals(".htaccess")) return; // skip
60             if (!name.endsWith(".xml")) {
61                 Log.logln("Wrong filename format: " + PathUtilities.getNormalizedPathString(f));
62                 return;
63             }
64             String localeName = name.substring(0, name.length() - 4);
65             Set<String> s = locale_files.get(localeName);
66             if (s == null) {
67                 locale_files.put(localeName, s = new TreeSet<>());
68             }
69             s.add(f.getParent());
70         } else {
71             String[] subnames = f.list();
72             for (int i = 0; i < subnames.length; ++i) {
73                 addFiles(normalizedPath + File.separatorChar + subnames[i]);
74             }
75         }
76     }
77 
78     static class VettingInfo {
79         private String value;
80         private String fullPath;
81         private String dir;
82 
VettingInfo(String dir, String fullPath, String value)83         public VettingInfo(String dir, String fullPath, String value) {
84             this.value = value;
85             this.fullPath = fullPath;
86             this.dir = dir;
87         }
88 
89         @Override
toString()90         public String toString() {
91             return "source: " + dir + ";\t value: <" + value + ">";
92         }
93 
compareByPathAndValue(VettingInfo other)94         public int compareByPathAndValue(VettingInfo other) {
95             int result;
96             if (0 != (result = fullPath.compareTo(other.fullPath))) return result;
97             if (0 != (result = value.compareTo(other.value))) return result;
98             return 0;
99         }
100     }
101 
102     static Comparator PathAndValueComparator =
103             new Comparator() {
104                 @Override
105                 public int compare(Object o1, Object o2) {
106                     return ((VettingInfo) o1).compareByPathAndValue((VettingInfo) o2);
107                 }
108             };
109 
110     static class VettingInfoSet {
111         private Map<String, List<VettingInfo>> path_vettingInfoList = new TreeMap<>();
112 
add(String path, String dir, String fullPath, String value)113         public void add(String path, String dir, String fullPath, String value) {
114             VettingInfo vi = new VettingInfo(dir, fullPath, value);
115             List<VettingInfo> s = path_vettingInfoList.get(path);
116             if (s == null) path_vettingInfoList.put(path, s = new ArrayList<>(1));
117             s.add(vi);
118         }
119 
iterator()120         public Iterator<String> iterator() {
121             return path_vettingInfoList.keySet().iterator();
122         }
123 
get(String path)124         public Collection<VettingInfo> get(String path) {
125             return path_vettingInfoList.get(path);
126         }
127     }
128 
keySet()129     public Set<String> keySet() {
130         return locale_files.keySet();
131     }
132 
incorporateVetting(String locale, String targetDir)133     public void incorporateVetting(String locale, String targetDir) throws IOException {
134         Set<String> s = locale_files.get(locale);
135         Log.logln("Vetting Data for: " + locale);
136         VettingInfoSet accum = new VettingInfoSet();
137         for (Iterator<String> it2 = s.iterator(); it2.hasNext(); ) {
138             String dir = it2.next() + File.separator;
139             String fixedLocale = "fixed-" + locale + ".xml";
140             fixXML(dir, locale + ".xml", dir, fixedLocale);
141             CLDRFile cldr =
142                     SimpleFactory.makeFromFile(dir + fixedLocale, locale, DraftStatus.approved);
143             for (Iterator<String> it3 = cldr.iterator(); it3.hasNext(); ) {
144                 String path = it3.next();
145                 String value = cldr.getStringValue(path);
146                 String fullPath = cldr.getFullXPath(path);
147                 // skip bogus values
148                 if (value.startsWith("//ldml") || value.length() == 0) {
149                     Log.logln("Skipping: [" + value + "] for " + fullPath);
150                     continue;
151                 }
152                 accum.add(stripAlt(path), dir, stripAlt(fullPath), value);
153             }
154         }
155         // now walk though items. If there is a single value, keep it
156         // otherwise show
157         Set<VettingInfo> uniquePathAndValue = new TreeSet<VettingInfo>(PathAndValueComparator);
158         CLDRFile cldrDelta = SimpleFactory.makeFile(locale);
159         boolean gotOne = false;
160         for (Iterator<String> it2 = accum.iterator(); it2.hasNext(); ) {
161             String path = it2.next();
162             Collection<VettingInfo> c = accum.get(path);
163             uniquePathAndValue.clear();
164             uniquePathAndValue.addAll(c);
165             if (uniquePathAndValue.size() == 1) { // no conflict
166                 VettingInfo vi = uniquePathAndValue.iterator().next();
167                 cldrDelta.add(vi.fullPath, vi.value);
168                 gotOne = true;
169             } else { // there is a conflict
170                 conflictSet.add(new Object[] {locale, path, c});
171             }
172         }
173         if (gotOne) {
174             Log.logln("Writing: " + targetDir + locale + ".xml");
175             PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, locale + ".xml");
176             cldrDelta.write(pw);
177             pw.close();
178         } else {
179             Log.logln("No data left in: " + targetDir + locale + ".xml");
180         }
181     }
182 
showSources()183     public void showSources() {
184         for (Iterator<String> it = locale_files.keySet().iterator(); it.hasNext(); ) {
185             String key = it.next();
186             Set<String> s = locale_files.get(key);
187             for (Iterator<String> it2 = s.iterator(); it2.hasNext(); ) {
188                 Log.logln(key + " \t" + it2.next());
189                 key = "";
190             }
191         }
192     }
193 
fixXML(String inputDir, String inputFile, String outputDir, String outputFile)194     public void fixXML(String inputDir, String inputFile, String outputDir, String outputFile)
195             throws IOException {
196         BufferedReader in = FileUtilities.openUTF8Reader(inputDir, inputFile);
197         PrintWriter out = FileUtilities.openUTF8Writer(outputDir, outputFile);
198         int haveLanguages = 0,
199                 haveScripts = 0,
200                 haveTerritories = 0,
201                 haveVariants = 0,
202                 haveKeys = 0,
203                 haveTypes = 0;
204         int inLocaleDisplayNames = 0;
205         while (true) {
206             String line = in.readLine();
207             if (line == null) break;
208             String trimmed = line.trim();
209 
210             if (inLocaleDisplayNames == 1) {
211                 haveLanguages = fixItem(out, haveLanguages, trimmed, "<language ", "languages");
212                 haveScripts = fixItem(out, haveScripts, trimmed, "<script ", "scripts");
213                 haveTerritories =
214                         fixItem(out, haveTerritories, trimmed, "<territory ", "territories");
215                 haveVariants = fixItem(out, haveVariants, trimmed, "<variant ", "variants");
216                 haveKeys = fixItem(out, haveKeys, trimmed, "<key ", "keys");
217                 haveTypes = fixItem(out, haveTypes, trimmed, "<type ", "types");
218             }
219 
220             if (trimmed.startsWith("<localeDisplayNames")) inLocaleDisplayNames = 1;
221             else if (trimmed.startsWith("</localeDisplayNames")) inLocaleDisplayNames = 2;
222 
223             out.println(line);
224         }
225         in.close();
226         out.close();
227     }
228 
229     /** */
fixItem( PrintWriter out, int haveLanguages, String trimmed, String item, String fix)230     private int fixItem(
231             PrintWriter out, int haveLanguages, String trimmed, String item, String fix) {
232         if (trimmed.startsWith(item)) {
233             if (haveLanguages == 0) {
234                 out.println("<" + fix + ">");
235                 haveLanguages = 1;
236             }
237             return haveLanguages;
238         }
239         if (haveLanguages == 1) {
240             out.println("</" + fix + ">");
241             haveLanguages = 2;
242         }
243         return haveLanguages;
244     }
245 
246     /**
247      * @return Returns the conflictSet.
248      */
getConflictSet()249     public Set<Object[]> getConflictSet() {
250         return conflictSet;
251     }
252 
253     /**
254      * @param cldrFactory
255      * @throws IOException
256      */
showFiles(Factory cldrFactory, String targetDir)257     public void showFiles(Factory cldrFactory, String targetDir) throws IOException {
258         english = cldrFactory.make("en", true);
259 
260         Log.logln("");
261         Log.logln("A. Sources");
262         Log.logln("");
263         showSources();
264 
265         Log.logln("");
266         Log.logln("B. Intermediate Results");
267         Log.logln("");
268         Set<String> vettedLocales = keySet();
269         for (Iterator<String> it = vettedLocales.iterator(); it.hasNext(); ) {
270             incorporateVetting(it.next(), targetDir);
271         }
272 
273         Log.logln("");
274         Log.logln("C. Conflicts");
275         Log.logln("");
276         showConflicts(cldrFactory);
277 
278         Log.logln("");
279         Log.logln("D. Missing Vetting");
280         Log.logln("");
281 
282         Set<String> availableLocales = new TreeSet<>(cldrFactory.getAvailable());
283         availableLocales.removeAll(vettedLocales);
284 
285         for (Iterator<String> it = availableLocales.iterator(); it.hasNext(); ) {
286             String locale = it.next();
287             CLDRFile cldr = cldrFactory.make(locale, false);
288             for (Iterator<String> it2 = cldr.iterator(); it2.hasNext(); ) {
289                 String path = it2.next();
290                 String fullPath = cldr.getFullXPath(path);
291                 if (fullPath.indexOf("[@draft=") >= 0) {
292                     Log.logln(locale + " \t" + english.getName(locale) + "\texample: " + fullPath);
293                     break;
294                 }
295             }
296         }
297     }
298 
299     CLDRFile english;
300 
301     /** */
showConflicts(Factory cldrFactory)302     private void showConflicts(Factory cldrFactory) {
303 
304         Set<Object[]> s = getConflictSet();
305         String lastLocale = "";
306         CLDRFile cldr = null;
307         Transliterator any_latin = Transliterator.getInstance("any-latin");
308         Set<String> emails = new LinkedHashSet<>();
309         String[] pieces = new String[5];
310 
311         for (Iterator<Object[]> it = s.iterator(); it.hasNext(); ) {
312             Object[] items = it.next();
313             String entry = "";
314             if (!lastLocale.equals(items[0])) {
315                 showSet(emails);
316                 lastLocale = (String) items[0];
317                 cldr = cldrFactory.make(lastLocale, false);
318                 entry = "==========" + Utility.LINE_SEPARATOR + lastLocale + Utility.LINE_SEPARATOR;
319             }
320             String path = CLDRFile.getDistinguishingXPath((String) items[1], null);
321             String current = cldr.getStringValue(path);
322             entry +=
323                     "\tpath:\t"
324                             + path
325                             + Utility.LINE_SEPARATOR
326                             + "\tcurrent value:\t"
327                             + getValue(any_latin, current)
328                             + Utility.LINE_SEPARATOR;
329 
330             entry +=
331                     "\tEnglish value:\t"
332                             + getValue(any_latin, english.getStringValue(path))
333                             + Utility.LINE_SEPARATOR;
334             Collection<VettingInfo> c = (Collection<VettingInfo>) items[2];
335             for (Iterator<VettingInfo> it2 = c.iterator(); it2.hasNext(); ) {
336                 VettingInfo vi = it2.next();
337                 entry +=
338                         "\t\tvalue:\t"
339                                 + getValue(any_latin, vi.value)
340                                 + "\t source: "
341                                 + vi.dir
342                                 + Utility.LINE_SEPARATOR;
343                 // get third field, that's the email
344                 Utility.split(vi.dir, '\\', pieces);
345                 emails.add(pieces[2]);
346             }
347 
348             if (false) {
349                 System.out.println("path: " + path);
350                 for (int i = 0; i < items.length; ++i) {
351                     System.out.println("item[" + i + "]: " + items[i]);
352                 }
353             }
354             Log.logln(entry);
355         }
356         showSet(emails);
357     }
358 
359     /** */
showSet(Set<String> emails)360     private void showSet(Set<String> emails) {
361         if (emails.size() == 0) return;
362         String result = "Emails:\t";
363         for (Iterator<String> it = emails.iterator(); it.hasNext(); ) {
364             result += it.next() + ", ";
365         }
366         result += "cldr@unicode.org";
367         emails.clear();
368         Log.logln(result);
369     }
370 
371     /** */
getValue(Transliterator some, String current)372     private String getValue(Transliterator some, String current) {
373         if (current == null) current = "NULL";
374         String other = some.transliterate(current);
375         return "<" + current + ">" + (other.equals(current) ? "" : "\t[" + other + "]");
376     }
377 
378     /** */
stripAlt(String path)379     private String stripAlt(String path) {
380         XPathParts tempParts = XPathParts.getFrozenInstance(path);
381         Map<String, String> x = tempParts.getAttributes(tempParts.size() - 1);
382         String value = x.get("alt");
383         if (value != null && value.startsWith("proposed")) {
384             x.remove("alt");
385             // System.out.println(path + "\t=>\t" + tempParts.toString());
386             return tempParts.toString();
387         }
388         return path;
389     }
390 }
391