package org.unicode.cldr.tool; import java.io.IOException; import java.util.Collection; import java.util.Comparator; import java.util.LinkedHashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; import org.unicode.cldr.util.CLDRConfig; import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CldrUtility; import org.unicode.cldr.util.Pair; import org.unicode.cldr.util.StandardCodes; import org.unicode.cldr.util.StandardCodes.LstrField; import org.unicode.cldr.util.StandardCodes.LstrType; import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet.Builder; import com.google.common.collect.Multimap; import com.ibm.icu.text.Collator; import com.ibm.icu.util.ULocale; public class ChartLanguageGroups extends Chart { private static final String SHOULD_NOT_BE_LEAF_NODE = "🍂"; private static final String LEAF_NODES = "🍃"; private static final String TREE_NODES = "🌲"; public static void main(String[] args) { new ChartLanguageGroups().writeChart(null); } static final Set COLLECTIONS; static { Map> languages = StandardCodes.getEnumLstreg().get(LstrType.language); Builder _collections = ImmutableSet. builder(); for (Entry> e : languages.entrySet()) { String scope = e.getValue().get(LstrField.Scope); if (scope != null && "Collection".equalsIgnoreCase(scope)) { _collections.add(e.getKey()); } } COLLECTIONS = _collections.build(); } @Override public String getDirectory() { return FormattedFileWriter.CHART_TARGET_DIR; } @Override public String getTitle() { return "Language Groups"; } @Override public String getExplanation() { return "

This chart shows draft language groups based on data extracted from wikidata. " + "The Status cell indicates the nature of the items in the adjacent Contained cell:

" + "

    \n" + "
  • A " + TREE_NODES + " indicates that the contained languages are tree nodes (contain other languages or langauge groups), " + "and will be listed further down in the chart in a Language Group cell.
  • \n" + "
  • A " + LEAF_NODES + " indicates that the contained languages are leaf nodes (contain nothing).
  • \n" + "
  • A " + SHOULD_NOT_BE_LEAF_NODE + " before an item in a Contained cell indicates a leaf node that shouldn’t be — that is, its ISO 639 Scope is " + "Collection.
  • \n" + "
\n" + "

Caveats: Only the wikidata containment for " + "valid language codes is used." + "The containment data is not complete: " + "if a language doesn't appear in the chart it could be an isolate, or just be missing data." + "The data doesn't completely match wikipedia’s; there are some patches for CLDR languages.

\n"; } Collator ENGLISH_ORDER = Collator.getInstance(ULocale.ENGLISH); @Override public void writeContents(FormattedFileWriter pw) throws IOException { Multimap lg = CLDRConfig.getInstance().getSupplementalDataInfo().getLanguageGroups(); TablePrinter tablePrinter = new TablePrinter() .addColumn("Language Group", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) .setBreakSpans(true) .addColumn("Name", "class='source'", null, "class='source'", true) .addColumn("St.", "class='source'", null, "class='source'", true) .addColumn("Contained", "class='source'", null, "class='target'", true) .setBreakSpans(true); show(lg, "mul", tablePrinter); pw.write(tablePrinter.toTable()); } private void show(Multimap lg, String parent, TablePrinter tablePrinter) { Collection children = lg.get(parent); if (children == null || children.isEmpty()) { return; } TreeSet> nameAndCode = new TreeSet<>(new Comparator>() { @Override public int compare(Pair o1, Pair o2) { int diff = ENGLISH_ORDER.compare(o1.getFirst(), o2.getFirst()); if (diff != 0) { return diff; } return o1.getSecond().compareTo(o2.getSecond()); } }); for (String lang : children) { nameAndCode.add(Pair.of(getLangName(lang), lang)); } StringBuilder treeList = new StringBuilder(); StringBuilder leafList = new StringBuilder(); LinkedHashSet> nameAndCodeWithChildren = new LinkedHashSet<>(); for (Pair pair : nameAndCode) { String code = pair.getSecond(); if (lg.containsKey(code)) { addChildren(treeList, TREE_NODES, pair, false); nameAndCodeWithChildren.add(pair); } else if (!code.equals("und")) { addChildren(leafList, LEAF_NODES, pair, true); } } if (treeList.length() != 0) { addRow(parent, tablePrinter, TREE_NODES, treeList); } if (leafList.length() != 0) { addRow(parent, tablePrinter, LEAF_NODES, leafList); } for (Pair pair : nameAndCodeWithChildren) { show(lg, pair.getSecond(), tablePrinter); } } private void addRow(String parent, TablePrinter tablePrinter, String marker, StringBuilder treeList) { tablePrinter.addRow() .addCell(parent) .addCell(getLangName(parent)) .addCell(marker) .addCell(treeList.toString()) .finishRow(); } private void addChildren(StringBuilder treeList, String marker, Pair pair, boolean showCollections) { if (treeList.length() != 0) { treeList.append("; "); } treeList.append(getPairName(pair, showCollections)); } private String getPairName(Pair pair, boolean showCollection) { return (showCollection && COLLECTIONS.contains(pair.getSecond()) ? SHOULD_NOT_BE_LEAF_NODE + " " : "") + pair.getSecond() + " “" + pair.getFirst() + "”"; } private String getLangName(String langCode) { return langCode.equals("mul") ? "All" : langCode.equals("zh") ? "Mandarin Chinese" : ENGLISH.getName(CLDRFile.LANGUAGE_NAME, langCode).replace(" (Other)", "").replace(" languages", ""); } }