• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.util.ArrayList;
4 import java.util.Collection;
5 import java.util.Comparator;
6 import java.util.EnumSet;
7 import java.util.LinkedHashSet;
8 import java.util.List;
9 import java.util.Map;
10 import java.util.Map.Entry;
11 import java.util.Set;
12 import java.util.TreeMap;
13 import java.util.TreeSet;
14 
15 import org.unicode.cldr.util.CLDRConfig;
16 import org.unicode.cldr.util.CLDRFile;
17 import org.unicode.cldr.util.Factory;
18 import org.unicode.cldr.util.ICUServiceBuilder;
19 import org.unicode.cldr.util.LanguageTagParser;
20 import org.unicode.cldr.util.Level;
21 import org.unicode.cldr.util.Organization;
22 import org.unicode.cldr.util.PluralRanges;
23 import org.unicode.cldr.util.StandardCodes;
24 import org.unicode.cldr.util.SupplementalDataInfo;
25 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
26 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
27 
28 import com.ibm.icu.dev.util.CollectionUtilities;
29 import com.ibm.icu.impl.Relation;
30 import com.ibm.icu.text.DecimalFormat;
31 import com.ibm.icu.text.MessageFormat;
32 import com.ibm.icu.text.PluralRules;
33 import com.ibm.icu.text.PluralRules.FixedDecimal;
34 import com.ibm.icu.util.Output;
35 import com.ibm.icu.util.ULocale;
36 
37 public class GeneratePluralRanges {
GeneratePluralRanges(SupplementalDataInfo supplementalDataInfo)38     public GeneratePluralRanges(SupplementalDataInfo supplementalDataInfo) {
39         SUPPLEMENTAL = supplementalDataInfo;
40         prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
41     }
42 
43     private static final boolean MINIMAL = true;
44 
main(String[] args)45     public static void main(String[] args) {
46         CLDRConfig testInfo = ToolConfig.getToolInstance();
47         GeneratePluralRanges me = new GeneratePluralRanges(testInfo.getSupplementalDataInfo());
48         me.reformatPluralRanges();
49         //me.generateSamples(testInfo.getEnglish(), testInfo.getCldrFactory());
50     }
51 
generateSamples(CLDRFile english, Factory factory)52     private void generateSamples(CLDRFile english, Factory factory) {
53         //Map<ULocale, PluralRulesFactory.SamplePatterns> samples = PluralRulesFactory.getLocaleToSamplePatterns();
54         // add all the items with plural ranges
55         Set<String> sorted = new TreeSet<String>(SUPPLEMENTAL.getPluralRangesLocales());
56         // add the core locales
57 //        sorted.addAll(StandardCodes.make().getLocaleCoverageLocales("google", EnumSet.of(Level.MODERN)));
58         sorted.addAll(StandardCodes.make().getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN)));
59         // add any variant plural forms
60         LanguageTagParser ltp = new LanguageTagParser();
61         for (String locale : SUPPLEMENTAL.getPluralLocales()) {
62             if (locale.contains("_")) {
63                 if (sorted.contains(ltp.set(locale).getLanguage())) {
64                     sorted.add(locale);
65                 }
66             }
67         }
68         //sorted.add("fil");
69         System.out.println("Co.\tLocale Name\tStart\tEnd\tResult\tStart Sample\tEnd Sample\tStart Example\tEnd Example\tCombined Example");
70         for (String locale : sorted) {
71             PluralInfo pluralInfo3 = SUPPLEMENTAL.getPlurals(locale);
72             if (locale.contains("_")) {
73                 PluralInfo pluralInfo2 = SUPPLEMENTAL.getPlurals(ltp.set(locale).getLanguage());
74                 if (pluralInfo2.equals(pluralInfo3)) {
75                     continue;
76                 }
77             }
78 
79             Set<Count> counts3 = pluralInfo3.getCounts();
80             if (counts3.size() == 1) {
81                 continue; // skip japanese, etc.
82             }
83 
84             List<RangeSample> list = getRangeInfo(factory.make(locale, true));
85             if (list == null) {
86                 System.out.println("Failure with " + locale);
87                 continue;
88             }
89             for (RangeSample rangeSample : list) {
90                 System.out.println(locale + "\t" + english.getName(locale)
91                     + "\t" + rangeSample.start
92                     + "\t" + rangeSample.end
93                     + "\t" + (rangeSample.result == null ? "missing" : rangeSample.result)
94                     + "\t" + rangeSample.min
95                     + "\t" + rangeSample.max
96                     + "\t" + rangeSample.startExample
97                     + "\t" + rangeSample.endExample
98                     + "\t" + rangeSample.resultExample);
99             }
100         }
101     }
102 
getRangeInfo(CLDRFile cldrFile)103     public List<RangeSample> getRangeInfo(CLDRFile cldrFile) {
104         String locale = cldrFile.getLocaleID();
105         if (locale.equals("iw")) {
106             locale = "he";
107         }
108         //Map<ULocale, PluralRulesFactory.SamplePatterns> samples = PluralRulesFactory.getLocaleToSamplePatterns();
109         List<RangeSample> list = new ArrayList<RangeSample>();
110         PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale);
111         Set<Count> counts = pluralInfo.getCounts();
112         PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale);
113         if (pluralRanges == null && locale.contains("_")) {
114             String locale2 = new ULocale(locale).getLanguage();
115             pluralRanges = SUPPLEMENTAL.getPluralRanges(locale2);
116         }
117         if (pluralRanges == null) {
118             return null;
119         }
120         ULocale ulocale = new ULocale(locale);
121         PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(ulocale.toString()); // CldrUtility.get(samples, ulocale);
122 //        if (samplePatterns == null && locale.contains("_")) {
123 //            ulocale = new ULocale(ulocale.getLanguage());
124 //            samplePatterns = CldrUtility.get(samples, ulocale);
125 //            if (samplePatterns == null) {
126 //                return null;
127 //            }
128 //        }
129 
130         Output<FixedDecimal> maxSample = new Output<FixedDecimal>();
131         Output<FixedDecimal> minSample = new Output<FixedDecimal>();
132 
133         ICUServiceBuilder icusb = new ICUServiceBuilder();
134         icusb.setCldrFile(cldrFile);
135         DecimalFormat nf = icusb.getNumberFormat(1);
136         //String decimal = cldrFile.getWinningValue("//ldml/numbers/symbols[@numberSystem=\"latn\"]/decimal");
137         String defaultNumberingSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem");
138         String range = cldrFile.getWinningValue("//ldml/numbers/miscPatterns[@numberSystem=\""
139             + defaultNumberingSystem
140             + "\"]/pattern[@type=\"range\"]");
141 
142         //            if (decimal == null) {
143         //                throw new IllegalArgumentException();
144         //            }
145         for (Count s : counts) {
146             for (Count e : counts) {
147                 if (!pluralInfo.rangeExists(s, e, minSample, maxSample)) {
148                     continue;
149                 }
150                 Count r = pluralRanges.getExplicit(s, e);
151                 String minFormatted = format(nf, minSample.value);
152                 String maxFormatted = format(nf, maxSample.value);
153                 String rangeFormatted = MessageFormat.format(range, minFormatted, maxFormatted);
154 
155                 list.add(new RangeSample(
156                     s, e, r,
157                     minSample.value,
158                     maxSample.value,
159                     getExample(locale, samplePatterns, s, minFormatted), getExample(locale, samplePatterns, e, maxFormatted),
160                     getExample(locale, samplePatterns, r, rangeFormatted)));
161             }
162         }
163         return list;
164     }
165 
166     public static class RangeSample {
167         // Category Examples    Minimal Pairs   Rules
RangeSample(Count start, Count end, Count result, FixedDecimal min, FixedDecimal max, String startExample, String endExample, String resultExample)168         public RangeSample(Count start, Count end, Count result,
169             FixedDecimal min, FixedDecimal max,
170             String startExample, String endExample, String resultExample) {
171             this.start = start;
172             this.end = end;
173             this.result = result;
174             this.min = min;
175             this.max = max;
176             this.startExample = startExample;
177             this.endExample = endExample;
178             this.resultExample = resultExample;
179         }
180 
181         final Count start;
182         final Count end;
183         final Count result;
184         final FixedDecimal min;
185         final FixedDecimal max;
186         final String startExample;
187         final String endExample;
188         final String resultExample;
189     }
190 
format(DecimalFormat nf, FixedDecimal minSample)191     public static String format(DecimalFormat nf, FixedDecimal minSample) {
192         nf.setMinimumFractionDigits(minSample.getVisibleDecimalDigitCount());
193         nf.setMaximumFractionDigits(minSample.getVisibleDecimalDigitCount());
194         return nf.format(minSample);
195     }
196 
197     //    private String format(String decimal, Output<FixedDecimal> minSample) {
198     //        return minSample.toString().replace(".", decimal);
199     //    }
200 
getExample(String locale, PluralMinimalPairs samplePatterns, Count r, String numString)201     public static String getExample(String locale, PluralMinimalPairs samplePatterns, Count r, String numString) {
202         if (r == null) {
203             return "«missing»";
204         }
205         String samplePattern;
206         try {
207             samplePattern = samplePatterns.get(PluralRules.PluralType.CARDINAL, r); // CldrUtility.get(samplePatterns.keywordToPattern, r);
208         } catch (Exception e) {
209             throw new IllegalArgumentException("Locale: " + locale + "; Count: " + r, e);
210         }
211         return samplePattern
212             .replace('\u00A0', '\u0020')
213             .replace("{0}", numString);
214     }
215 
216     private final SupplementalDataInfo SUPPLEMENTAL;
217     private final PluralRulesFactory prf;
218 
219     public static final Comparator<Set<String>> STRING_SET_COMPARATOR = new SetComparator<String, Set<String>>();
220     public static final Comparator<Set<Count>> COUNT_SET_COMPARATOR = new SetComparator<Count, Set<Count>>();
221 
222     static final class SetComparator<T extends Comparable<T>, U extends Set<T>> implements Comparator<U> {
compare(U o1, U o2)223         public int compare(U o1, U o2) {
224             return CollectionUtilities.compare((Collection<T>) o1, (Collection<T>) o2);
225         }
226     };
227 
reformatPluralRanges()228     public void reformatPluralRanges() {
229         Map<Set<Count>, Relation<Set<String>, String>> seen = new TreeMap<Set<Count>, Relation<Set<String>, String>>(COUNT_SET_COMPARATOR);
230 
231         for (String locale : SUPPLEMENTAL.getPluralRangesLocales()) {
232 
233             PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale);
234             PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale);
235             Set<Count> counts = pluralInfo.getCounts();
236 
237             Set<String> s;
238             if (false) {
239                 System.out.println("Minimized, but not ready for prime-time");
240                 s = minimize(pluralRanges, pluralInfo);
241             } else {
242                 s = reformat(pluralRanges, counts);
243             }
244             Relation<Set<String>, String> item = seen.get(counts);
245             if (item == null) {
246                 seen.put(counts,
247                     item = Relation.of(new TreeMap<Set<String>, Set<String>>(STRING_SET_COMPARATOR), TreeSet.class));
248             }
249             item.put(s, locale);
250         }
251 
252         for (Entry<Set<Count>, Relation<Set<String>, String>> entry0 : seen.entrySet()) {
253             System.out.println("\n<!-- " + CollectionUtilities.join(entry0.getKey(), ", ") + " -->");
254             for (Entry<Set<String>, Set<String>> entry : entry0.getValue().keyValuesSet()) {
255                 System.out.println("\t\t<pluralRanges locales=\"" + CollectionUtilities.join(entry.getValue(), " ") + "\">");
256                 for (String line : entry.getKey()) {
257                     System.out.println("\t\t\t" + line);
258                 }
259                 System.out.println("\t\t</pluralRanges>");
260             }
261         }
262     }
263 
264     enum RangeStrategy {
265         other, end, start, mixed
266     }
267 
reformat(PluralRanges pluralRanges, Set<Count> counts)268     public Set<String> reformat(PluralRanges pluralRanges, Set<Count> counts) {
269         Set<String> s;
270         s = new LinkedHashSet<String>();
271         // first determine the general principle
272 
273         //        EnumSet<RangeStrategy> strategy = EnumSet.allOf(RangeStrategy.class);
274         //        Count firstResult = null;
275         //        for (Count start : counts) {
276         //            for (Count end : counts) {
277         //                Count result = pluralRanges.getExplicit(start, end);
278         //                if (result == null) {
279         //                    continue;
280         //                } else if (firstResult == null) {
281         //                    firstResult = result;
282         //                }
283         //                if (result != start) {
284         //                    strategy.remove(RangeStrategy.start);
285         //                }
286         //                if (result != end) {
287         //                    strategy.remove(RangeStrategy.end);
288         //                }
289         //                if (result != Count.other) {
290         //                    strategy.remove(RangeStrategy.other);
291         //                }
292         //           }
293         //        }
294         //        s.add("<!-- Range Principle: " + strategy.iterator().next() + " -->");
295         for (Count start : counts) {
296             for (Count end : counts) {
297                 Count result = pluralRanges.getExplicit(start, end);
298                 if (result == null) {
299                     continue;
300                 }
301                 String line = PluralRanges.showRange(start, end, result);
302                 s.add(line);
303             }
304         }
305         return s;
306     }
307 
minimize(PluralRanges pluralRanges, PluralInfo pluralInfo)308     Set<String> minimize(PluralRanges pluralRanges, PluralInfo pluralInfo) {
309         Set<String> result = new LinkedHashSet<String>();
310         // make it easier to manage
311         PluralRanges.Matrix matrix = new PluralRanges.Matrix();
312         Output<FixedDecimal> maxSample = new Output<FixedDecimal>();
313         Output<FixedDecimal> minSample = new Output<FixedDecimal>();
314         for (Count s : Count.VALUES) {
315             for (Count e : Count.VALUES) {
316                 if (!pluralInfo.rangeExists(s, e, minSample, maxSample)) {
317                     continue;
318                 }
319                 Count r = pluralRanges.getExplicit(s, e);
320                 matrix.set(s, e, r);
321             }
322         }
323         // if everything is 'other', we are done
324         //        if (allOther == true) {
325         //            return result;
326         //        }
327         EnumSet<Count> endDone = EnumSet.noneOf(Count.class);
328         EnumSet<Count> startDone = EnumSet.noneOf(Count.class);
329         if (MINIMAL) {
330             for (Count end : pluralInfo.getCounts()) {
331                 Count r = matrix.endSame(end);
332                 if (r != null
333                 //&& r != Count.other
334                 ) {
335                     result.add("<pluralRange" +
336                         "              \t\tend=\"" + end
337                         + "\"\tresult=\"" + r + "\"/>");
338                     endDone.add(end);
339                 }
340             }
341             Output<Boolean> emit = new Output<Boolean>();
342             for (Count start : pluralInfo.getCounts()) {
343                 Count r = matrix.startSame(start, endDone, emit);
344                 if (r != null
345                 // && r != Count.other
346                 ) {
347                     if (emit.value) {
348                         result.add("<pluralRange" +
349                             "\tstart=\"" + start
350                             + "\"          \t\tresult=\"" + r + "\"/>");
351                     }
352                     startDone.add(start);
353                 }
354             }
355         }
356         //Set<String> skip = new LinkedHashSet<String>();
357         for (Count end : pluralInfo.getCounts()) {
358             if (endDone.contains(end)) {
359                 continue;
360             }
361             for (Count start : pluralInfo.getCounts()) {
362                 if (startDone.contains(start)) {
363                     continue;
364                 }
365                 Count r = matrix.get(start, end);
366                 if (r != null
367                 //&& !(MINIMAL && r == Count.other)
368                 ) {
369                     result.add(PluralRanges.showRange(start, end, r));
370                 } else {
371                     result.add("<!-- <pluralRange" +
372                         "\tstart=\"" + start
373                         + "\" \tend=\"" + end
374                         + "\" \tresult=\"" + r + "\"/> -->");
375 
376                 }
377 
378             }
379         }
380         return result;
381     }
382 
383 }
384