• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.io.IOException;
4 import java.util.Collection;
5 import java.util.Comparator;
6 import java.util.LinkedHashSet;
7 import java.util.Map;
8 import java.util.Map.Entry;
9 import java.util.Set;
10 import java.util.TreeSet;
11 
12 import org.unicode.cldr.util.CLDRConfig;
13 import org.unicode.cldr.util.CLDRFile;
14 import org.unicode.cldr.util.CldrUtility;
15 import org.unicode.cldr.util.Pair;
16 import org.unicode.cldr.util.StandardCodes;
17 import org.unicode.cldr.util.StandardCodes.LstrField;
18 import org.unicode.cldr.util.StandardCodes.LstrType;
19 
20 import com.google.common.collect.ImmutableSet;
21 import com.google.common.collect.ImmutableSet.Builder;
22 import com.google.common.collect.Multimap;
23 import com.ibm.icu.text.Collator;
24 import com.ibm.icu.util.ULocale;
25 
26 public class ChartLanguageGroups extends Chart {
27 
28     private static final String SHOULD_NOT_BE_LEAF_NODE = "��";
29     private static final String LEAF_NODES = "��";
30     private static final String TREE_NODES = "��";
31 
main(String[] args)32     public static void main(String[] args) {
33         new ChartLanguageGroups().writeChart(null);
34     }
35 
36     static final Set<String> COLLECTIONS;
37     static {
38         Map<String, Map<LstrField, String>> languages = StandardCodes.getEnumLstreg().get(LstrType.language);
39         Builder<String> _collections = ImmutableSet.<String> builder();
40         for (Entry<String, Map<LstrField, String>> e : languages.entrySet()) {
41             String scope = e.getValue().get(LstrField.Scope);
42             if (scope != null
43                 && "Collection".equalsIgnoreCase(scope)) {
e.getKey()44                 _collections.add(e.getKey());
45             }
46         }
47         COLLECTIONS = _collections.build();
48     }
49 
50     @Override
getDirectory()51     public String getDirectory() {
52         return FormattedFileWriter.CHART_TARGET_DIR;
53     }
54 
55     @Override
getTitle()56     public String getTitle() {
57         return "Language Groups";
58     }
59 
60     @Override
getExplanation()61     public String getExplanation() {
62         return "<p>This chart shows draft language groups based on data extracted from wikidata. "
63             + "The <b>Status</b> cell indicates the nature of the items in the adjacent <b>Contained</b> cell:<p>"
64             + "<ul>\n"
65             + "<li>A " + TREE_NODES
66             + " indicates that the contained languages are tree nodes (contain other languages or langauge groups), "
67             + "and will be listed further down in the chart in a <b>Language Group</b> cell.</li>\n"
68             + "<li>A " + LEAF_NODES
69             + " indicates that the contained languages are leaf nodes (contain nothing).</li>\n"
70             + "<li>A " + SHOULD_NOT_BE_LEAF_NODE
71             + " before an item <i>in</i> a <b>Contained</b> cell indicates a leaf node that shouldn’t be — that is, its ISO 639 Scope is "
72             + "<a href='http://www-01.sil.org/iso639-3/scope.asp#C' target='_blank'>Collection</a>.</li>\n"
73             + "</ul>\n"
74             + "<p><b>Caveats:</b> Only the wikidata containment for "
75             + "<a href='http://unicode.org/reports/tr35/#unicode_language_subtag'>valid language codes</a> is used."
76             + "The containment data is not complete: "
77             + "if a language doesn't appear in the chart it could be an isolate, or just be missing data."
78             + "The data doesn't completely match wikipedia’s; there are some patches for CLDR languages.</p>\n";
79     }
80 
81     Collator ENGLISH_ORDER = Collator.getInstance(ULocale.ENGLISH);
82 
83     @Override
writeContents(FormattedFileWriter pw)84     public void writeContents(FormattedFileWriter pw) throws IOException {
85 
86         Multimap<String, String> lg = CLDRConfig.getInstance().getSupplementalDataInfo().getLanguageGroups();
87 
88         TablePrinter tablePrinter = new TablePrinter()
89             .addColumn("Language Group", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
90             .setBreakSpans(true)
91             .addColumn("Name", "class='source'", null, "class='source'", true)
92             .addColumn("St.", "class='source'", null, "class='source'", true)
93             .addColumn("Contained", "class='source'", null, "class='target'", true)
94             .setBreakSpans(true);
95 
96         show(lg, "mul", tablePrinter);
97         pw.write(tablePrinter.toTable());
98     }
99 
show(Multimap<String, String> lg, String parent, TablePrinter tablePrinter)100     private void show(Multimap<String, String> lg, String parent, TablePrinter tablePrinter) {
101         Collection<String> children = lg.get(parent);
102         if (children == null || children.isEmpty()) {
103             return;
104         }
105         TreeSet<Pair<String, String>> nameAndCode = new TreeSet<>(new Comparator<Pair<String, String>>() {
106             @Override
107             public int compare(Pair<String, String> o1, Pair<String, String> o2) {
108                 int diff = ENGLISH_ORDER.compare(o1.getFirst(), o2.getFirst());
109                 if (diff != 0) {
110                     return diff;
111                 }
112                 return o1.getSecond().compareTo(o2.getSecond());
113             }
114 
115         });
116         for (String lang : children) {
117             nameAndCode.add(Pair.of(getLangName(lang), lang));
118         }
119         StringBuilder treeList = new StringBuilder();
120         StringBuilder leafList = new StringBuilder();
121         LinkedHashSet<Pair<String, String>> nameAndCodeWithChildren = new LinkedHashSet<>();
122         for (Pair<String, String> pair : nameAndCode) {
123             String code = pair.getSecond();
124             if (lg.containsKey(code)) {
125                 addChildren(treeList, TREE_NODES, pair, false);
126                 nameAndCodeWithChildren.add(pair);
127             } else if (!code.equals("und")) {
128                 addChildren(leafList, LEAF_NODES, pair, true);
129             }
130         }
131         if (treeList.length() != 0) {
132             addRow(parent, tablePrinter, TREE_NODES, treeList);
133         }
134         if (leafList.length() != 0) {
135             addRow(parent, tablePrinter, LEAF_NODES, leafList);
136         }
137 
138         for (Pair<String, String> pair : nameAndCodeWithChildren) {
139             show(lg, pair.getSecond(), tablePrinter);
140         }
141     }
142 
addRow(String parent, TablePrinter tablePrinter, String marker, StringBuilder treeList)143     private void addRow(String parent, TablePrinter tablePrinter, String marker, StringBuilder treeList) {
144         tablePrinter.addRow()
145             .addCell(parent)
146             .addCell(getLangName(parent))
147             .addCell(marker)
148             .addCell(treeList.toString())
149             .finishRow();
150     }
151 
addChildren(StringBuilder treeList, String marker, Pair<String, String> pair, boolean showCollections)152     private void addChildren(StringBuilder treeList, String marker, Pair<String, String> pair, boolean showCollections) {
153         if (treeList.length() != 0) {
154             treeList.append("; ");
155         }
156         treeList.append(getPairName(pair, showCollections));
157     }
158 
getPairName(Pair<String, String> pair, boolean showCollection)159     private String getPairName(Pair<String, String> pair, boolean showCollection) {
160         return (showCollection && COLLECTIONS.contains(pair.getSecond())
161             ? SHOULD_NOT_BE_LEAF_NODE + " " : "")
162             + pair.getSecond() + " “" + pair.getFirst() + "”";
163     }
164 
getLangName(String langCode)165     private String getLangName(String langCode) {
166         return langCode.equals("mul") ? "All"
167             : langCode.equals("zh") ? "Mandarin Chinese"
168                 : ENGLISH.getName(CLDRFile.LANGUAGE_NAME, langCode).replace(" (Other)", "").replace(" languages", "");
169     }
170 }
171