1 package org.unicode.cldr.tool; 2 3 import java.util.Collection; 4 import java.util.LinkedHashSet; 5 import java.util.Map; 6 import java.util.Map.Entry; 7 import java.util.Set; 8 import java.util.TreeMap; 9 import java.util.TreeSet; 10 11 import org.unicode.cldr.tool.Option.Options; 12 import org.unicode.cldr.util.CLDRConfig; 13 import org.unicode.cldr.util.CLDRFile; 14 import org.unicode.cldr.util.CLDRPaths; 15 import org.unicode.cldr.util.ChainedMap; 16 import org.unicode.cldr.util.ChainedMap.M3; 17 import org.unicode.cldr.util.CldrUtility; 18 import org.unicode.cldr.util.Counter; 19 import org.unicode.cldr.util.DtdData; 20 import org.unicode.cldr.util.DtdType; 21 import org.unicode.cldr.util.Factory; 22 import org.unicode.cldr.util.LocaleIDParser; 23 import org.unicode.cldr.util.PathHeader; 24 import org.unicode.cldr.util.SimpleFactory; 25 import org.unicode.cldr.util.With; 26 import org.unicode.cldr.util.XPathParts; 27 28 import com.google.common.base.Splitter; 29 import com.google.common.collect.Multimap; 30 import com.google.common.collect.TreeMultimap; 31 import com.ibm.icu.dev.util.CollectionUtilities; 32 import com.ibm.icu.util.Output; 33 34 public class DiffCldr { 35 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 36 37 // ADD OPTIONS LATER 38 39 enum MyOptions { 40 //organization(".*", "CLDR", "organization"), 41 filter(".*", "en_001", "locale ancestor"), 42 ; 43 44 // BOILERPLATE TO COPY 45 final Option option; 46 MyOptions(String argumentPattern, String defaultArgument, String helpText)47 private MyOptions(String argumentPattern, String defaultArgument, String helpText) { 48 option = new Option(this, argumentPattern, defaultArgument, helpText); 49 } 50 51 static Options myOptions = new Options(); 52 static { 53 for (MyOptions option : MyOptions.values()) { myOptions.add(option, option.option)54 myOptions.add(option, option.option); 55 } 56 } 57 parse(String[] args, boolean showArguments)58 private static Set<String> parse(String[] args, boolean showArguments) { 59 return myOptions.parse(MyOptions.values()[0], args, true); 60 } 61 } 62 main(String[] args)63 public static void main(String[] args) { 64 MyOptions.parse(args, true); 65 String localeBase = MyOptions.filter.option.getValue(); 66 67 String dirBase = CLDRPaths.COMMON_DIRECTORY; 68 PathHeader.Factory phf = PathHeader.getFactory(CONFIG.getEnglish()); 69 70 // load data 71 72 M3<PathHeader, String, String> data = ChainedMap.of(new TreeMap<PathHeader, Object>(), new TreeMap<String, Object>(), String.class); 73 Counter<String> localeCounter = new Counter<>(); 74 Counter<PathHeader> pathHeaderCounter = new Counter<>(); 75 int total = 0; 76 Output<String> pathWhereFound = new Output<>(); 77 Output<String> localeWhereFound = new Output<>(); 78 // Output<String> reformattedValue = new Output<String>(); 79 // Output<Boolean> hasReformattedValue = new Output<Boolean>(); 80 Multimap<String, String> extras = TreeMultimap.create(); 81 82 for (String dir : DtdType.ldml.directories) { 83 Factory factory = SimpleFactory.make(dirBase + dir, ".*"); 84 Set<String> available = factory.getAvailable(); 85 Set<String> locales = new LinkedHashSet<>(); 86 if (!available.contains(localeBase)) { 87 continue; 88 } 89 locales.add(localeBase); 90 for (String locale : available) { 91 if (hasAncestor(locale, localeBase)) { 92 locales.add(locale); 93 } 94 } 95 for (String locale : locales) { 96 if (locale.equals("en_WS")) { 97 int debug = 0; 98 } 99 boolean isBase = locale.equals(localeBase); 100 CLDRFile cldrFile = factory.make(locale, isBase); 101 DtdData dtdData = cldrFile.getDtdData(); 102 CLDRFile cldrFileResolved = factory.make(locale, true); 103 for (String distinguishedPath : With.in(cldrFile.iterator())) { 104 String path = cldrFile.getFullXPath(distinguishedPath); 105 106 XPathParts pathPlain = XPathParts.getFrozenInstance(path); 107 if (dtdData.isMetadata(pathPlain)) { 108 continue; 109 } 110 if (pathPlain.getElement(1).equals("identity")) { 111 continue; 112 } 113 String value = cldrFile.getStringValue(distinguishedPath); 114 String bailey = cldrFileResolved.getBaileyValue(distinguishedPath, pathWhereFound, localeWhereFound); 115 116 // one of the attributes might be a value (ugg) 117 // so check for that, and extract the value 118 119 Set<String> pathForValues = dtdData.getRegularizedPaths(pathPlain, extras); 120 if (pathForValues != null && (isBase || !value.equals(bailey))) { 121 for (String pathForValue : pathForValues) { 122 PathHeader ph = phf.fromPath(pathForValue); 123 Splitter splitter = DtdData.getValueSplitter(pathPlain); 124 String cleanedValue = joinValues(pathPlain, splitter.splitToList(value)); 125 total = addValue(data, locale, ph, cleanedValue, total, localeCounter, pathHeaderCounter); 126 } 127 } 128 129 // there are value attributes, so do them 130 131 for (Entry<String, Collection<String>> entry : extras.asMap().entrySet()) { 132 final String extraPath = entry.getKey(); 133 final PathHeader ph = phf.fromPath(extraPath); 134 final Collection<String> extraValues = entry.getValue(); 135 String cleanedValue = joinValues(pathPlain, extraValues); 136 total = addValue(data, locale, ph, cleanedValue, total, localeCounter, pathHeaderCounter); 137 } 138 if (pathForValues == null && !value.isEmpty()) { 139 System.err.println("Shouldn't happen"); 140 } 141 } 142 } 143 } 144 Set<String> localeList = localeCounter.getKeysetSortedByCount(false); 145 146 // now print differences 147 Set<String> currentValues = new TreeSet<>(); 148 System.out.print("№\tSection\tPage\tHeader\tCode\tCount"); 149 for (String locale : localeList) { 150 System.out.print("\t" + locale); 151 } 152 System.out.println(); 153 System.out.print("\t\t\t\tCount\t" + total); 154 for (String locale : localeList) { 155 System.out.print("\t" + localeCounter.get(locale)); 156 } 157 System.out.println(); 158 159 int sort = 0; 160 for (PathHeader ph : data.keySet()) { 161 String firstValue = null; 162 currentValues.clear(); 163 final Map<String, String> localeToValue = data.get(ph); 164 currentValues.addAll(localeToValue.values()); 165 if (currentValues.size() <= 1) { 166 continue; 167 } 168 169 // have difference, so print 170 171 System.out.print(++sort + "\t" + ph + "\t" + pathHeaderCounter.get(ph)); 172 for (String locale : localeList) { 173 System.out.print("\t" + CldrUtility.ifNull(localeToValue.get(locale), "")); 174 } 175 System.out.println(); 176 } 177 } 178 hasAncestor(String locale, String localeBase)179 private static boolean hasAncestor(String locale, String localeBase) { 180 while (true) { 181 if (locale == null) { 182 return false; 183 } else if (locale.equals(localeBase)) { 184 return true; 185 } 186 locale = LocaleIDParser.getParent(locale); 187 } 188 } 189 190 /** 191 * Add <ph,value) line, recording extra info. 192 */ addValue(M3<PathHeader, String, String> data, String locale, PathHeader ph, String value, int total, Counter<String> localeCounter, Counter<PathHeader> pathHeaderCounter)193 private static int addValue(M3<PathHeader, String, String> data, String locale, PathHeader ph, String value, 194 int total, Counter<String> localeCounter, Counter<PathHeader> pathHeaderCounter) { 195 if (value.isEmpty()) { 196 return 0; 197 } 198 String old = data.get(ph, locale); 199 if (old != null) { 200 return 0; // suppress duplicates 201 } 202 data.put(ph, locale, value); 203 // add to counts 204 ++total; 205 localeCounter.add(locale, 1); // count of items in locale 206 pathHeaderCounter.add(ph, 1); // count of items with same pathHeader, across locales 207 return total; 208 } 209 210 /** 211 * Fix values that are multiple lines or multiple items 212 */ joinValues(XPathParts pathPlain, Collection<String> values)213 private static String joinValues(XPathParts pathPlain, Collection<String> values) { 214 Set<String> cleanedValues = new LinkedHashSet<>(); 215 for (String item : values) { 216 if (!DtdData.isComment(pathPlain, item)) { 217 cleanedValues.add(item); 218 } 219 } 220 return CollectionUtilities.join(DtdData.CR_SPLITTER.split(CollectionUtilities.join(values, " ␍ ")), " ␍ "); 221 } 222 } 223