1 package org.unicode.cldr.tool; 2 3 import java.util.Collection; 4 import java.util.Map; 5 import java.util.Map.Entry; 6 import java.util.TreeMap; 7 8 import org.unicode.cldr.util.Annotations; 9 import org.unicode.cldr.util.Annotations.AnnotationSet; 10 import org.unicode.cldr.util.Counter; 11 12 import com.google.common.collect.Multimap; 13 import com.google.common.collect.TreeMultimap; 14 import com.ibm.icu.text.BreakIterator; 15 import com.ibm.icu.text.UnicodeSet; 16 import com.ibm.icu.util.ULocale; 17 18 public class MakeAnnotationHistogram { 19 private static final Integer ZERO = (Integer) 0; 20 main(String[] args)21 public static void main(String[] args) { 22 AnnotationSet english = Annotations.getDataSet("en"); 23 UnicodeSet codes = english.getExplicitValues().keySet(); 24 Multimap<String, ULocale> missingCodeToLocales = TreeMultimap.create(); 25 Map<String, Counter<Integer>> codeToCounter = new TreeMap<>(); 26 int maxmax = 0; 27 for (String locale : Annotations.getAvailable()) { 28 ULocale ulocale = new ULocale(locale); 29 AnnotationSet annotationSet = Annotations.getDataSet(locale); 30 Counter<Integer> counter = new Counter<>(); 31 32 int max = 0; 33 for (String code : codes) { 34 String name = annotationSet.getShortName(code); 35 if (name == null) { 36 missingCodeToLocales.put(code, ulocale); 37 continue; 38 } 39 int clusterCount = getCount(name, ulocale); 40 counter.add(clusterCount, 1); 41 max = Math.max(max, clusterCount); 42 43 Counter<Integer> counterForCode = codeToCounter.get(code); 44 if (counterForCode == null) { 45 codeToCounter.put(code, counterForCode = new Counter<>()); 46 } 47 counterForCode.add(clusterCount, 1); 48 } 49 System.out.print(locale + "\t" + ulocale.getDisplayName()); 50 for (int i = 1; i <= max; ++i) { 51 System.out.print("\t" + emptyIfZero(counter.getCount(i))); 52 } 53 System.out.println(); 54 if (maxmax < max) { 55 maxmax = max; 56 } 57 } 58 System.out.println("Missing"); 59 for (Entry<String, Collection<ULocale>> entry : missingCodeToLocales.asMap().entrySet()) { 60 System.out.println(entry.getKey() + "\t" + entry.getValue()); 61 } 62 System.out.println("CodeToGCs"); 63 for (Entry<String, Counter<Integer>> entry : codeToCounter.entrySet()) { 64 String code = entry.getKey(); 65 Counter<Integer> counter = entry.getValue(); 66 System.out.print(code); 67 for (int i = 1; i <= maxmax; ++i) { 68 System.out.print("\t" + emptyIfZero(counter.getCount(i))); 69 } 70 System.out.println(); 71 } 72 } 73 emptyIfZero(long count)74 private static String emptyIfZero(long count) { 75 return count == 0 ? "" : String.valueOf(count); 76 } 77 getCount(String name, ULocale locale)78 private static int getCount(String name, ULocale locale) { 79 BreakIterator boundary = BreakIterator.getCharacterInstance(locale); 80 int count = 0; 81 boundary.setText(name); 82 83 int start = boundary.first(); 84 for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) { 85 if (name.charAt(start) == ' ') { 86 continue; 87 } 88 ++count; 89 } 90 return count; 91 } 92 } 93