1 package org.unicode.cldr.tool; 2 3 import java.io.IOException; 4 import java.util.Collection; 5 import java.util.Comparator; 6 import java.util.LinkedHashSet; 7 import java.util.Map; 8 import java.util.Map.Entry; 9 import java.util.Set; 10 import java.util.TreeSet; 11 12 import org.unicode.cldr.util.CLDRConfig; 13 import org.unicode.cldr.util.CLDRFile; 14 import org.unicode.cldr.util.CldrUtility; 15 import org.unicode.cldr.util.Pair; 16 import org.unicode.cldr.util.StandardCodes; 17 import org.unicode.cldr.util.StandardCodes.LstrField; 18 import org.unicode.cldr.util.StandardCodes.LstrType; 19 20 import com.google.common.collect.ImmutableSet; 21 import com.google.common.collect.ImmutableSet.Builder; 22 import com.google.common.collect.Multimap; 23 import com.ibm.icu.text.Collator; 24 import com.ibm.icu.util.ULocale; 25 26 public class ChartLanguageGroups extends Chart { 27 28 private static final String SHOULD_NOT_BE_LEAF_NODE = ""; 29 private static final String LEAF_NODES = ""; 30 private static final String TREE_NODES = ""; 31 main(String[] args)32 public static void main(String[] args) { 33 new ChartLanguageGroups().writeChart(null); 34 } 35 36 static final Set<String> COLLECTIONS; 37 static { 38 Map<String, Map<LstrField, String>> languages = StandardCodes.getEnumLstreg().get(LstrType.language); 39 Builder<String> _collections = ImmutableSet.<String> builder(); 40 for (Entry<String, Map<LstrField, String>> e : languages.entrySet()) { 41 String scope = e.getValue().get(LstrField.Scope); 42 if (scope != null 43 && "Collection".equalsIgnoreCase(scope)) { e.getKey()44 _collections.add(e.getKey()); 45 } 46 } 47 COLLECTIONS = _collections.build(); 48 } 49 50 @Override getDirectory()51 public String getDirectory() { 52 return FormattedFileWriter.CHART_TARGET_DIR; 53 } 54 55 @Override getTitle()56 public String getTitle() { 57 return "Language Groups"; 58 } 59 60 @Override getExplanation()61 public String getExplanation() { 62 return "<p>This chart shows draft language groups based on data extracted from wikidata. " 63 + "The <b>Status</b> cell indicates the nature of the items in the adjacent <b>Contained</b> cell:<p>" 64 + "<ul>\n" 65 + "<li>A " + TREE_NODES 66 + " indicates that the contained languages are tree nodes (contain other languages or langauge groups), " 67 + "and will be listed further down in the chart in a <b>Language Group</b> cell.</li>\n" 68 + "<li>A " + LEAF_NODES 69 + " indicates that the contained languages are leaf nodes (contain nothing).</li>\n" 70 + "<li>A " + SHOULD_NOT_BE_LEAF_NODE 71 + " before an item <i>in</i> a <b>Contained</b> cell indicates a leaf node that shouldn’t be — that is, its ISO 639 Scope is " 72 + "<a href='http://www-01.sil.org/iso639-3/scope.asp#C' target='_blank'>Collection</a>.</li>\n" 73 + "</ul>\n" 74 + "<p><b>Caveats:</b> Only the wikidata containment for " 75 + "<a href='http://unicode.org/reports/tr35/#unicode_language_subtag'>valid language codes</a> is used." 76 + "The containment data is not complete: " 77 + "if a language doesn't appear in the chart it could be an isolate, or just be missing data." 78 + "The data doesn't completely match wikipedia’s; there are some patches for CLDR languages.</p>\n"; 79 } 80 81 Collator ENGLISH_ORDER = Collator.getInstance(ULocale.ENGLISH); 82 83 @Override writeContents(FormattedFileWriter pw)84 public void writeContents(FormattedFileWriter pw) throws IOException { 85 86 Multimap<String, String> lg = CLDRConfig.getInstance().getSupplementalDataInfo().getLanguageGroups(); 87 88 TablePrinter tablePrinter = new TablePrinter() 89 .addColumn("Language Group", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 90 .setBreakSpans(true) 91 .addColumn("Name", "class='source'", null, "class='source'", true) 92 .addColumn("St.", "class='source'", null, "class='source'", true) 93 .addColumn("Contained", "class='source'", null, "class='target'", true) 94 .setBreakSpans(true); 95 96 show(lg, "mul", tablePrinter); 97 pw.write(tablePrinter.toTable()); 98 } 99 show(Multimap<String, String> lg, String parent, TablePrinter tablePrinter)100 private void show(Multimap<String, String> lg, String parent, TablePrinter tablePrinter) { 101 Collection<String> children = lg.get(parent); 102 if (children == null || children.isEmpty()) { 103 return; 104 } 105 TreeSet<Pair<String, String>> nameAndCode = new TreeSet<>(new Comparator<Pair<String, String>>() { 106 @Override 107 public int compare(Pair<String, String> o1, Pair<String, String> o2) { 108 int diff = ENGLISH_ORDER.compare(o1.getFirst(), o2.getFirst()); 109 if (diff != 0) { 110 return diff; 111 } 112 return o1.getSecond().compareTo(o2.getSecond()); 113 } 114 115 }); 116 for (String lang : children) { 117 nameAndCode.add(Pair.of(getLangName(lang), lang)); 118 } 119 StringBuilder treeList = new StringBuilder(); 120 StringBuilder leafList = new StringBuilder(); 121 LinkedHashSet<Pair<String, String>> nameAndCodeWithChildren = new LinkedHashSet<>(); 122 for (Pair<String, String> pair : nameAndCode) { 123 String code = pair.getSecond(); 124 if (lg.containsKey(code)) { 125 addChildren(treeList, TREE_NODES, pair, false); 126 nameAndCodeWithChildren.add(pair); 127 } else if (!code.equals("und")) { 128 addChildren(leafList, LEAF_NODES, pair, true); 129 } 130 } 131 if (treeList.length() != 0) { 132 addRow(parent, tablePrinter, TREE_NODES, treeList); 133 } 134 if (leafList.length() != 0) { 135 addRow(parent, tablePrinter, LEAF_NODES, leafList); 136 } 137 138 for (Pair<String, String> pair : nameAndCodeWithChildren) { 139 show(lg, pair.getSecond(), tablePrinter); 140 } 141 } 142 addRow(String parent, TablePrinter tablePrinter, String marker, StringBuilder treeList)143 private void addRow(String parent, TablePrinter tablePrinter, String marker, StringBuilder treeList) { 144 tablePrinter.addRow() 145 .addCell(parent) 146 .addCell(getLangName(parent)) 147 .addCell(marker) 148 .addCell(treeList.toString()) 149 .finishRow(); 150 } 151 addChildren(StringBuilder treeList, String marker, Pair<String, String> pair, boolean showCollections)152 private void addChildren(StringBuilder treeList, String marker, Pair<String, String> pair, boolean showCollections) { 153 if (treeList.length() != 0) { 154 treeList.append("; "); 155 } 156 treeList.append(getPairName(pair, showCollections)); 157 } 158 getPairName(Pair<String, String> pair, boolean showCollection)159 private String getPairName(Pair<String, String> pair, boolean showCollection) { 160 return (showCollection && COLLECTIONS.contains(pair.getSecond()) 161 ? SHOULD_NOT_BE_LEAF_NODE + " " : "") 162 + pair.getSecond() + " “" + pair.getFirst() + "”"; 163 } 164 getLangName(String langCode)165 private String getLangName(String langCode) { 166 return langCode.equals("mul") ? "All" 167 : langCode.equals("zh") ? "Mandarin Chinese" 168 : ENGLISH.getName(CLDRFile.LANGUAGE_NAME, langCode).replace(" (Other)", "").replace(" languages", ""); 169 } 170 } 171