• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.util.Collection;
4 import java.util.LinkedHashSet;
5 import java.util.Map;
6 import java.util.Map.Entry;
7 import java.util.Set;
8 import java.util.TreeMap;
9 import java.util.TreeSet;
10 
11 import org.unicode.cldr.tool.Option.Options;
12 import org.unicode.cldr.util.CLDRConfig;
13 import org.unicode.cldr.util.CLDRFile;
14 import org.unicode.cldr.util.CLDRPaths;
15 import org.unicode.cldr.util.ChainedMap;
16 import org.unicode.cldr.util.ChainedMap.M3;
17 import org.unicode.cldr.util.CldrUtility;
18 import org.unicode.cldr.util.Counter;
19 import org.unicode.cldr.util.DtdData;
20 import org.unicode.cldr.util.DtdType;
21 import org.unicode.cldr.util.Factory;
22 import org.unicode.cldr.util.LocaleIDParser;
23 import org.unicode.cldr.util.PathHeader;
24 import org.unicode.cldr.util.SimpleFactory;
25 import org.unicode.cldr.util.With;
26 import org.unicode.cldr.util.XPathParts;
27 
28 import com.google.common.base.Joiner;
29 import com.google.common.base.Splitter;
30 import com.google.common.collect.Multimap;
31 import com.google.common.collect.TreeMultimap;
32 import com.ibm.icu.util.Output;
33 
34 public class DiffCldr {
35     private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
36 
37     // ADD OPTIONS LATER
38 
39     enum MyOptions {
40         //organization(".*", "CLDR", "organization"),
41         filter(".*", "en_001", "locale ancestor"),
42         ;
43 
44         // BOILERPLATE TO COPY
45         final Option option;
46 
MyOptions(String argumentPattern, String defaultArgument, String helpText)47         private MyOptions(String argumentPattern, String defaultArgument, String helpText) {
48             option = new Option(this, argumentPattern, defaultArgument, helpText);
49         }
50 
51         static Options myOptions = new Options();
52         static {
53             for (MyOptions option : MyOptions.values()) {
myOptions.add(option, option.option)54                 myOptions.add(option, option.option);
55             }
56         }
57 
parse(String[] args, boolean showArguments)58         private static Set<String> parse(String[] args, boolean showArguments) {
59             return myOptions.parse(MyOptions.values()[0], args, true);
60         }
61     }
62 
main(String[] args)63     public static void main(String[] args) {
64         MyOptions.parse(args, true);
65         String localeBase = MyOptions.filter.option.getValue();
66 
67         String dirBase = CLDRPaths.COMMON_DIRECTORY;
68         PathHeader.Factory phf = PathHeader.getFactory(CONFIG.getEnglish());
69 
70         // load data
71 
72         M3<PathHeader, String, String> data = ChainedMap.of(new TreeMap<PathHeader, Object>(), new TreeMap<String, Object>(), String.class);
73         Counter<String> localeCounter = new Counter<>();
74         Counter<PathHeader> pathHeaderCounter = new Counter<>();
75         int total = 0;
76         Output<String> pathWhereFound = new Output<>();
77         Output<String> localeWhereFound = new Output<>();
78 //        Output<String> reformattedValue = new Output<String>();
79 //        Output<Boolean> hasReformattedValue = new Output<Boolean>();
80         Multimap<String, String> extras = TreeMultimap.create();
81 
82         for (String dir : DtdType.ldml.directories) {
83             Factory factory = SimpleFactory.make(dirBase + dir, ".*");
84             Set<String> available = factory.getAvailable();
85             Set<String> locales = new LinkedHashSet<>();
86             if (!available.contains(localeBase)) {
87                 continue;
88             }
89             locales.add(localeBase);
90             for (String locale : available) {
91                 if (hasAncestor(locale, localeBase)) {
92                     locales.add(locale);
93                 }
94             }
95             for (String locale : locales) {
96                 if (locale.equals("en_WS")) {
97                     int debug = 0;
98                 }
99                 boolean isBase = locale.equals(localeBase);
100                 CLDRFile cldrFile = factory.make(locale, isBase);
101                 DtdData dtdData = cldrFile.getDtdData();
102                 CLDRFile cldrFileResolved = factory.make(locale, true);
103                 for (String distinguishedPath : With.in(cldrFile.iterator())) {
104                     String path = cldrFile.getFullXPath(distinguishedPath);
105 
106                     XPathParts pathPlain = XPathParts.getFrozenInstance(path);
107                     if (dtdData.isMetadata(pathPlain)) {
108                         continue;
109                     }
110                     if (pathPlain.getElement(1).equals("identity")) {
111                         continue;
112                     }
113                     String value = cldrFile.getStringValue(distinguishedPath);
114                     String bailey = cldrFileResolved.getBaileyValue(distinguishedPath, pathWhereFound, localeWhereFound);
115 
116                     // one of the attributes might be a value (ugg)
117                     // so check for that, and extract the value
118 
119                     Set<String> pathForValues = dtdData.getRegularizedPaths(pathPlain, extras);
120                     if (pathForValues != null && (isBase || !value.equals(bailey))) {
121                         for (String pathForValue : pathForValues) {
122                             PathHeader ph = phf.fromPath(pathForValue);
123                             Splitter splitter = DtdData.getValueSplitter(pathPlain);
124                             String cleanedValue = joinValues(pathPlain, splitter.splitToList(value));
125                             total = addValue(data, locale, ph, cleanedValue, total, localeCounter, pathHeaderCounter);
126                         }
127                     }
128 
129                     // there are value attributes, so do them
130 
131                     for (Entry<String, Collection<String>> entry : extras.asMap().entrySet()) {
132                         final String extraPath = entry.getKey();
133                         final PathHeader ph = phf.fromPath(extraPath);
134                         final Collection<String> extraValues = entry.getValue();
135                         String cleanedValue = joinValues(pathPlain, extraValues);
136                         total = addValue(data, locale, ph, cleanedValue, total, localeCounter, pathHeaderCounter);
137                     }
138                     if (pathForValues == null && !value.isEmpty()) {
139                         System.err.println("Shouldn't happen");
140                     }
141                 }
142             }
143         }
144         Set<String> localeList = localeCounter.getKeysetSortedByCount(false);
145 
146         // now print differences
147         Set<String> currentValues = new TreeSet<>();
148         System.out.print("№\tSection\tPage\tHeader\tCode\tCount");
149         for (String locale : localeList) {
150             System.out.print("\t" + locale);
151         }
152         System.out.println();
153         System.out.print("\t\t\t\tCount\t" + total);
154         for (String locale : localeList) {
155             System.out.print("\t" + localeCounter.get(locale));
156         }
157         System.out.println();
158 
159         int sort = 0;
160         for (PathHeader ph : data.keySet()) {
161             String firstValue = null;
162             currentValues.clear();
163             final Map<String, String> localeToValue = data.get(ph);
164             currentValues.addAll(localeToValue.values());
165             if (currentValues.size() <= 1) {
166                 continue;
167             }
168 
169             // have difference, so print
170 
171             System.out.print(++sort + "\t" + ph + "\t" + pathHeaderCounter.get(ph));
172             for (String locale : localeList) {
173                 System.out.print("\t" + CldrUtility.ifNull(localeToValue.get(locale), ""));
174             }
175             System.out.println();
176         }
177     }
178 
hasAncestor(String locale, String localeBase)179     private static boolean hasAncestor(String locale, String localeBase) {
180         while (true) {
181             if (locale == null) {
182                 return false;
183             } else if (locale.equals(localeBase)) {
184                 return true;
185             }
186             locale = LocaleIDParser.getParent(locale);
187         }
188     }
189 
190     /**
191      * Add <ph,value) line, recording extra info.
192      */
addValue(M3<PathHeader, String, String> data, String locale, PathHeader ph, String value, int total, Counter<String> localeCounter, Counter<PathHeader> pathHeaderCounter)193     private static int addValue(M3<PathHeader, String, String> data, String locale, PathHeader ph, String value,
194         int total, Counter<String> localeCounter, Counter<PathHeader> pathHeaderCounter) {
195         if (value.isEmpty()) {
196             return 0;
197         }
198         String old = data.get(ph, locale);
199         if (old != null) {
200             return 0; // suppress duplicates
201         }
202         data.put(ph, locale, value);
203         // add to counts
204         ++total;
205         localeCounter.add(locale, 1); // count of items in locale
206         pathHeaderCounter.add(ph, 1); // count of items with same pathHeader, across locales
207         return total;
208     }
209 
210     /**
211      * Fix values that are multiple lines or multiple items
212      */
joinValues(XPathParts pathPlain, Collection<String> values)213     private static String joinValues(XPathParts pathPlain, Collection<String> values) {
214         Set<String> cleanedValues = new LinkedHashSet<>();
215         for (String item : values) {
216             if (!DtdData.isComment(pathPlain, item)) {
217                 cleanedValues.add(item);
218             }
219         }
220         return Joiner.on(" ␍ ").join(DtdData.CR_SPLITTER.split(Joiner.on(" ␍ ").join(values)));
221     }
222 }
223