1 package org.unicode.cldr.tool; 2 3 import java.util.ArrayList; 4 import java.util.Collection; 5 import java.util.Comparator; 6 import java.util.EnumSet; 7 import java.util.LinkedHashSet; 8 import java.util.List; 9 import java.util.Map; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import java.util.TreeMap; 13 import java.util.TreeSet; 14 15 import org.unicode.cldr.util.CLDRConfig; 16 import org.unicode.cldr.util.CLDRFile; 17 import org.unicode.cldr.util.Factory; 18 import org.unicode.cldr.util.ICUServiceBuilder; 19 import org.unicode.cldr.util.LanguageTagParser; 20 import org.unicode.cldr.util.Level; 21 import org.unicode.cldr.util.Organization; 22 import org.unicode.cldr.util.PluralRanges; 23 import org.unicode.cldr.util.StandardCodes; 24 import org.unicode.cldr.util.SupplementalDataInfo; 25 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 26 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 27 28 import com.ibm.icu.dev.util.CollectionUtilities; 29 import com.ibm.icu.impl.Relation; 30 import com.ibm.icu.text.DecimalFormat; 31 import com.ibm.icu.text.MessageFormat; 32 import com.ibm.icu.text.PluralRules; 33 import com.ibm.icu.text.PluralRules.FixedDecimal; 34 import com.ibm.icu.util.Output; 35 import com.ibm.icu.util.ULocale; 36 37 public class GeneratePluralRanges { GeneratePluralRanges(SupplementalDataInfo supplementalDataInfo)38 public GeneratePluralRanges(SupplementalDataInfo supplementalDataInfo) { 39 SUPPLEMENTAL = supplementalDataInfo; 40 prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 41 } 42 43 private static final boolean MINIMAL = true; 44 main(String[] args)45 public static void main(String[] args) { 46 CLDRConfig testInfo = ToolConfig.getToolInstance(); 47 GeneratePluralRanges me = new GeneratePluralRanges(testInfo.getSupplementalDataInfo()); 48 me.reformatPluralRanges(); 49 //me.generateSamples(testInfo.getEnglish(), testInfo.getCldrFactory()); 50 } 51 generateSamples(CLDRFile english, Factory factory)52 private void generateSamples(CLDRFile english, Factory factory) { 53 //Map<ULocale, PluralRulesFactory.SamplePatterns> samples = PluralRulesFactory.getLocaleToSamplePatterns(); 54 // add all the items with plural ranges 55 Set<String> sorted = new TreeSet<String>(SUPPLEMENTAL.getPluralRangesLocales()); 56 // add the core locales 57 // sorted.addAll(StandardCodes.make().getLocaleCoverageLocales("google", EnumSet.of(Level.MODERN))); 58 sorted.addAll(StandardCodes.make().getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN))); 59 // add any variant plural forms 60 LanguageTagParser ltp = new LanguageTagParser(); 61 for (String locale : SUPPLEMENTAL.getPluralLocales()) { 62 if (locale.contains("_")) { 63 if (sorted.contains(ltp.set(locale).getLanguage())) { 64 sorted.add(locale); 65 } 66 } 67 } 68 //sorted.add("fil"); 69 System.out.println("Co.\tLocale Name\tStart\tEnd\tResult\tStart Sample\tEnd Sample\tStart Example\tEnd Example\tCombined Example"); 70 for (String locale : sorted) { 71 PluralInfo pluralInfo3 = SUPPLEMENTAL.getPlurals(locale); 72 if (locale.contains("_")) { 73 PluralInfo pluralInfo2 = SUPPLEMENTAL.getPlurals(ltp.set(locale).getLanguage()); 74 if (pluralInfo2.equals(pluralInfo3)) { 75 continue; 76 } 77 } 78 79 Set<Count> counts3 = pluralInfo3.getCounts(); 80 if (counts3.size() == 1) { 81 continue; // skip japanese, etc. 82 } 83 84 List<RangeSample> list = getRangeInfo(factory.make(locale, true)); 85 if (list == null) { 86 System.out.println("Failure with " + locale); 87 continue; 88 } 89 for (RangeSample rangeSample : list) { 90 System.out.println(locale + "\t" + english.getName(locale) 91 + "\t" + rangeSample.start 92 + "\t" + rangeSample.end 93 + "\t" + (rangeSample.result == null ? "missing" : rangeSample.result) 94 + "\t" + rangeSample.min 95 + "\t" + rangeSample.max 96 + "\t" + rangeSample.startExample 97 + "\t" + rangeSample.endExample 98 + "\t" + rangeSample.resultExample); 99 } 100 } 101 } 102 getRangeInfo(CLDRFile cldrFile)103 public List<RangeSample> getRangeInfo(CLDRFile cldrFile) { 104 String locale = cldrFile.getLocaleID(); 105 if (locale.equals("iw")) { 106 locale = "he"; 107 } 108 //Map<ULocale, PluralRulesFactory.SamplePatterns> samples = PluralRulesFactory.getLocaleToSamplePatterns(); 109 List<RangeSample> list = new ArrayList<RangeSample>(); 110 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale); 111 Set<Count> counts = pluralInfo.getCounts(); 112 PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale); 113 if (pluralRanges == null && locale.contains("_")) { 114 String locale2 = new ULocale(locale).getLanguage(); 115 pluralRanges = SUPPLEMENTAL.getPluralRanges(locale2); 116 } 117 if (pluralRanges == null) { 118 return null; 119 } 120 ULocale ulocale = new ULocale(locale); 121 PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(ulocale.toString()); // CldrUtility.get(samples, ulocale); 122 // if (samplePatterns == null && locale.contains("_")) { 123 // ulocale = new ULocale(ulocale.getLanguage()); 124 // samplePatterns = CldrUtility.get(samples, ulocale); 125 // if (samplePatterns == null) { 126 // return null; 127 // } 128 // } 129 130 Output<FixedDecimal> maxSample = new Output<FixedDecimal>(); 131 Output<FixedDecimal> minSample = new Output<FixedDecimal>(); 132 133 ICUServiceBuilder icusb = new ICUServiceBuilder(); 134 icusb.setCldrFile(cldrFile); 135 DecimalFormat nf = icusb.getNumberFormat(1); 136 //String decimal = cldrFile.getWinningValue("//ldml/numbers/symbols[@numberSystem=\"latn\"]/decimal"); 137 String defaultNumberingSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem"); 138 String range = cldrFile.getWinningValue("//ldml/numbers/miscPatterns[@numberSystem=\"" 139 + defaultNumberingSystem 140 + "\"]/pattern[@type=\"range\"]"); 141 142 // if (decimal == null) { 143 // throw new IllegalArgumentException(); 144 // } 145 for (Count s : counts) { 146 for (Count e : counts) { 147 if (!pluralInfo.rangeExists(s, e, minSample, maxSample)) { 148 continue; 149 } 150 Count r = pluralRanges.getExplicit(s, e); 151 String minFormatted = format(nf, minSample.value); 152 String maxFormatted = format(nf, maxSample.value); 153 String rangeFormatted = MessageFormat.format(range, minFormatted, maxFormatted); 154 155 list.add(new RangeSample( 156 s, e, r, 157 minSample.value, 158 maxSample.value, 159 getExample(locale, samplePatterns, s, minFormatted), getExample(locale, samplePatterns, e, maxFormatted), 160 getExample(locale, samplePatterns, r, rangeFormatted))); 161 } 162 } 163 return list; 164 } 165 166 public static class RangeSample { 167 // Category Examples Minimal Pairs Rules RangeSample(Count start, Count end, Count result, FixedDecimal min, FixedDecimal max, String startExample, String endExample, String resultExample)168 public RangeSample(Count start, Count end, Count result, 169 FixedDecimal min, FixedDecimal max, 170 String startExample, String endExample, String resultExample) { 171 this.start = start; 172 this.end = end; 173 this.result = result; 174 this.min = min; 175 this.max = max; 176 this.startExample = startExample; 177 this.endExample = endExample; 178 this.resultExample = resultExample; 179 } 180 181 final Count start; 182 final Count end; 183 final Count result; 184 final FixedDecimal min; 185 final FixedDecimal max; 186 final String startExample; 187 final String endExample; 188 final String resultExample; 189 } 190 format(DecimalFormat nf, FixedDecimal minSample)191 public static String format(DecimalFormat nf, FixedDecimal minSample) { 192 nf.setMinimumFractionDigits(minSample.getVisibleDecimalDigitCount()); 193 nf.setMaximumFractionDigits(minSample.getVisibleDecimalDigitCount()); 194 return nf.format(minSample); 195 } 196 197 // private String format(String decimal, Output<FixedDecimal> minSample) { 198 // return minSample.toString().replace(".", decimal); 199 // } 200 getExample(String locale, PluralMinimalPairs samplePatterns, Count r, String numString)201 public static String getExample(String locale, PluralMinimalPairs samplePatterns, Count r, String numString) { 202 if (r == null) { 203 return "«missing»"; 204 } 205 String samplePattern; 206 try { 207 samplePattern = samplePatterns.get(PluralRules.PluralType.CARDINAL, r); // CldrUtility.get(samplePatterns.keywordToPattern, r); 208 } catch (Exception e) { 209 throw new IllegalArgumentException("Locale: " + locale + "; Count: " + r, e); 210 } 211 return samplePattern 212 .replace('\u00A0', '\u0020') 213 .replace("{0}", numString); 214 } 215 216 private final SupplementalDataInfo SUPPLEMENTAL; 217 private final PluralRulesFactory prf; 218 219 public static final Comparator<Set<String>> STRING_SET_COMPARATOR = new SetComparator<String, Set<String>>(); 220 public static final Comparator<Set<Count>> COUNT_SET_COMPARATOR = new SetComparator<Count, Set<Count>>(); 221 222 static final class SetComparator<T extends Comparable<T>, U extends Set<T>> implements Comparator<U> { compare(U o1, U o2)223 public int compare(U o1, U o2) { 224 return CollectionUtilities.compare((Collection<T>) o1, (Collection<T>) o2); 225 } 226 }; 227 reformatPluralRanges()228 public void reformatPluralRanges() { 229 Map<Set<Count>, Relation<Set<String>, String>> seen = new TreeMap<Set<Count>, Relation<Set<String>, String>>(COUNT_SET_COMPARATOR); 230 231 for (String locale : SUPPLEMENTAL.getPluralRangesLocales()) { 232 233 PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale); 234 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale); 235 Set<Count> counts = pluralInfo.getCounts(); 236 237 Set<String> s; 238 if (false) { 239 System.out.println("Minimized, but not ready for prime-time"); 240 s = minimize(pluralRanges, pluralInfo); 241 } else { 242 s = reformat(pluralRanges, counts); 243 } 244 Relation<Set<String>, String> item = seen.get(counts); 245 if (item == null) { 246 seen.put(counts, 247 item = Relation.of(new TreeMap<Set<String>, Set<String>>(STRING_SET_COMPARATOR), TreeSet.class)); 248 } 249 item.put(s, locale); 250 } 251 252 for (Entry<Set<Count>, Relation<Set<String>, String>> entry0 : seen.entrySet()) { 253 System.out.println("\n<!-- " + CollectionUtilities.join(entry0.getKey(), ", ") + " -->"); 254 for (Entry<Set<String>, Set<String>> entry : entry0.getValue().keyValuesSet()) { 255 System.out.println("\t\t<pluralRanges locales=\"" + CollectionUtilities.join(entry.getValue(), " ") + "\">"); 256 for (String line : entry.getKey()) { 257 System.out.println("\t\t\t" + line); 258 } 259 System.out.println("\t\t</pluralRanges>"); 260 } 261 } 262 } 263 264 enum RangeStrategy { 265 other, end, start, mixed 266 } 267 reformat(PluralRanges pluralRanges, Set<Count> counts)268 public Set<String> reformat(PluralRanges pluralRanges, Set<Count> counts) { 269 Set<String> s; 270 s = new LinkedHashSet<String>(); 271 // first determine the general principle 272 273 // EnumSet<RangeStrategy> strategy = EnumSet.allOf(RangeStrategy.class); 274 // Count firstResult = null; 275 // for (Count start : counts) { 276 // for (Count end : counts) { 277 // Count result = pluralRanges.getExplicit(start, end); 278 // if (result == null) { 279 // continue; 280 // } else if (firstResult == null) { 281 // firstResult = result; 282 // } 283 // if (result != start) { 284 // strategy.remove(RangeStrategy.start); 285 // } 286 // if (result != end) { 287 // strategy.remove(RangeStrategy.end); 288 // } 289 // if (result != Count.other) { 290 // strategy.remove(RangeStrategy.other); 291 // } 292 // } 293 // } 294 // s.add("<!-- Range Principle: " + strategy.iterator().next() + " -->"); 295 for (Count start : counts) { 296 for (Count end : counts) { 297 Count result = pluralRanges.getExplicit(start, end); 298 if (result == null) { 299 continue; 300 } 301 String line = PluralRanges.showRange(start, end, result); 302 s.add(line); 303 } 304 } 305 return s; 306 } 307 minimize(PluralRanges pluralRanges, PluralInfo pluralInfo)308 Set<String> minimize(PluralRanges pluralRanges, PluralInfo pluralInfo) { 309 Set<String> result = new LinkedHashSet<String>(); 310 // make it easier to manage 311 PluralRanges.Matrix matrix = new PluralRanges.Matrix(); 312 Output<FixedDecimal> maxSample = new Output<FixedDecimal>(); 313 Output<FixedDecimal> minSample = new Output<FixedDecimal>(); 314 for (Count s : Count.VALUES) { 315 for (Count e : Count.VALUES) { 316 if (!pluralInfo.rangeExists(s, e, minSample, maxSample)) { 317 continue; 318 } 319 Count r = pluralRanges.getExplicit(s, e); 320 matrix.set(s, e, r); 321 } 322 } 323 // if everything is 'other', we are done 324 // if (allOther == true) { 325 // return result; 326 // } 327 EnumSet<Count> endDone = EnumSet.noneOf(Count.class); 328 EnumSet<Count> startDone = EnumSet.noneOf(Count.class); 329 if (MINIMAL) { 330 for (Count end : pluralInfo.getCounts()) { 331 Count r = matrix.endSame(end); 332 if (r != null 333 //&& r != Count.other 334 ) { 335 result.add("<pluralRange" + 336 " \t\tend=\"" + end 337 + "\"\tresult=\"" + r + "\"/>"); 338 endDone.add(end); 339 } 340 } 341 Output<Boolean> emit = new Output<Boolean>(); 342 for (Count start : pluralInfo.getCounts()) { 343 Count r = matrix.startSame(start, endDone, emit); 344 if (r != null 345 // && r != Count.other 346 ) { 347 if (emit.value) { 348 result.add("<pluralRange" + 349 "\tstart=\"" + start 350 + "\" \t\tresult=\"" + r + "\"/>"); 351 } 352 startDone.add(start); 353 } 354 } 355 } 356 //Set<String> skip = new LinkedHashSet<String>(); 357 for (Count end : pluralInfo.getCounts()) { 358 if (endDone.contains(end)) { 359 continue; 360 } 361 for (Count start : pluralInfo.getCounts()) { 362 if (startDone.contains(start)) { 363 continue; 364 } 365 Count r = matrix.get(start, end); 366 if (r != null 367 //&& !(MINIMAL && r == Count.other) 368 ) { 369 result.add(PluralRanges.showRange(start, end, r)); 370 } else { 371 result.add("<!-- <pluralRange" + 372 "\tstart=\"" + start 373 + "\" \tend=\"" + end 374 + "\" \tresult=\"" + r + "\"/> -->"); 375 376 } 377 378 } 379 } 380 return result; 381 } 382 383 } 384