• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.util.Collection;
4 import java.util.Map;
5 import java.util.Map.Entry;
6 import java.util.TreeMap;
7 
8 import org.unicode.cldr.util.Annotations;
9 import org.unicode.cldr.util.Annotations.AnnotationSet;
10 import org.unicode.cldr.util.Counter;
11 
12 import com.google.common.collect.Multimap;
13 import com.google.common.collect.TreeMultimap;
14 import com.ibm.icu.text.BreakIterator;
15 import com.ibm.icu.text.UnicodeSet;
16 import com.ibm.icu.util.ULocale;
17 
18 public class MakeAnnotationHistogram {
19     private static final Integer ZERO = (Integer) 0;
20 
main(String[] args)21     public static void main(String[] args) {
22         AnnotationSet english = Annotations.getDataSet("en");
23         UnicodeSet codes = english.getExplicitValues().keySet();
24         Multimap<String, ULocale> missingCodeToLocales = TreeMultimap.create();
25         Map<String, Counter<Integer>> codeToCounter = new TreeMap<>();
26         int maxmax = 0;
27         for (String locale : Annotations.getAvailable()) {
28             ULocale ulocale = new ULocale(locale);
29             AnnotationSet annotationSet = Annotations.getDataSet(locale);
30             Counter<Integer> counter = new Counter<>();
31 
32             int max = 0;
33             for (String code : codes) {
34                 String name = annotationSet.getShortName(code);
35                 if (name == null) {
36                     missingCodeToLocales.put(code, ulocale);
37                     continue;
38                 }
39                 int clusterCount = getCount(name, ulocale);
40                 counter.add(clusterCount, 1);
41                 max = Math.max(max, clusterCount);
42 
43                 Counter<Integer> counterForCode = codeToCounter.get(code);
44                 if (counterForCode == null) {
45                     codeToCounter.put(code, counterForCode = new Counter<>());
46                 }
47                 counterForCode.add(clusterCount, 1);
48             }
49             System.out.print(locale + "\t" + ulocale.getDisplayName());
50             for (int i = 1; i <= max; ++i) {
51                 System.out.print("\t" + emptyIfZero(counter.getCount(i)));
52             }
53             System.out.println();
54             if (maxmax < max) {
55                 maxmax = max;
56             }
57         }
58         System.out.println("Missing");
59         for (Entry<String, Collection<ULocale>> entry : missingCodeToLocales.asMap().entrySet()) {
60             System.out.println(entry.getKey() + "\t" + entry.getValue());
61         }
62         System.out.println("CodeToGCs");
63         for (Entry<String, Counter<Integer>> entry : codeToCounter.entrySet()) {
64             String code = entry.getKey();
65             Counter<Integer> counter = entry.getValue();
66             System.out.print(code);
67             for (int i = 1; i <= maxmax; ++i) {
68                 System.out.print("\t" + emptyIfZero(counter.getCount(i)));
69             }
70             System.out.println();
71         }
72     }
73 
emptyIfZero(long count)74     private static String emptyIfZero(long count) {
75         return count == 0 ? "" : String.valueOf(count);
76     }
77 
getCount(String name, ULocale locale)78     private static int getCount(String name, ULocale locale) {
79         BreakIterator boundary = BreakIterator.getCharacterInstance(locale);
80         int count = 0;
81         boundary.setText(name);
82 
83         int start = boundary.first();
84         for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
85             if (name.charAt(start) == ' ') {
86                 continue;
87             }
88             ++count;
89         }
90         return count;
91     }
92 }
93