• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.Multimap;
5 import com.google.common.collect.TreeMultimap;
6 import com.ibm.icu.impl.Relation;
7 import com.ibm.icu.impl.Row;
8 import com.ibm.icu.impl.Row.R3;
9 import com.ibm.icu.impl.Utility;
10 import com.ibm.icu.text.RuleBasedCollator;
11 import com.ibm.icu.text.UnicodeSet;
12 import com.ibm.icu.util.ULocale;
13 import java.io.IOException;
14 import java.util.Arrays;
15 import java.util.Collection;
16 import java.util.EnumMap;
17 import java.util.LinkedHashMap;
18 import java.util.Map;
19 import java.util.Map.Entry;
20 import java.util.Set;
21 import java.util.TreeSet;
22 import org.unicode.cldr.draft.FileUtilities;
23 import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
24 import org.unicode.cldr.util.Annotations;
25 import org.unicode.cldr.util.Annotations.AnnotationSet;
26 import org.unicode.cldr.util.CLDRFile;
27 import org.unicode.cldr.util.CLDRPaths;
28 import org.unicode.cldr.util.CLDRURLS;
29 import org.unicode.cldr.util.CldrUtility;
30 import org.unicode.cldr.util.Factory;
31 import org.unicode.cldr.util.FileCopier;
32 import org.unicode.cldr.util.LanguageGroup;
33 import org.unicode.cldr.util.LanguageTagParser;
34 import org.unicode.cldr.util.LocaleIDParser;
35 
36 public class ChartAnnotations extends Chart {
37 
38     private static final String LDML_ANNOTATIONS =
39             "<a href='https://unicode.org/reports/tr35/tr35-general.html#Annotations'>LDML Annotations</a>";
40 
41     private static final String MAIN_HEADER =
42             "<p>Annotations provide names and keywords for Unicode characters, currently focusing on emoji. "
43                     + "If you see any problems, please <a target='_blank' href='"
44                     + CLDRURLS.CLDR_NEWTICKET_URL
45                     + "'>file a ticket</a> with the corrected values for the locale. "
46                     + "For the XML data used for these charts, see "
47                     + "<a href='http://unicode.org/repos/cldr/tags/latest/common/annotations/'>latest-release annotations </a> "
48                     + "or <a href='http://unicode.org/repos/cldr/tags/latest/common/annotations/'>beta annotations</a>. "
49                     + "For more information, see "
50                     + LDML_ANNOTATIONS
51                     + ".</p>";
52     private static final boolean DEBUG = false;
53     private static final String DIR = CLDRPaths.CHART_DIRECTORY + "annotations/";
54 
main(String[] args)55     public static void main(String[] args) {
56         new ChartAnnotations().writeChart(null);
57     }
58 
59     @Override
getDirectory()60     public String getDirectory() {
61         return DIR;
62     }
63 
64     @Override
getTitle()65     public String getTitle() {
66         return "Annotation Charts";
67     }
68 
69     @Override
getFileName()70     public String getFileName() {
71         return "index";
72     }
73 
74     @Override
getExplanation()75     public String getExplanation() {
76         return MAIN_HEADER
77                 + "<p>The charts are presented in groups of related languages, for easier comparison.<p>";
78     }
79 
80     @Override
writeContents(FormattedFileWriter pw)81     public void writeContents(FormattedFileWriter pw) throws IOException {
82         FileCopier.ensureDirectoryExists(DIR);
83         FileCopier.copy(Chart.class, "index.css", DIR);
84         FormattedFileWriter.copyIncludeHtmls(DIR);
85 
86         FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors();
87         writeSubcharts(anchors);
88         pw.setIndex("Main Chart Index", "../index.html");
89         pw.write(anchors.toString());
90     }
91 
92     static final UnicodeSet EXTRAS =
93             new UnicodeSet()
94                     .addAll(
95                             Arrays.asList(
96                                     "����",
97                                     "��",
98                                     "#️⃣",
99                                     "����",
100                                     "��‍❤️‍��‍��",
101                                     "��‍❤️‍��",
102                                     "��‍��‍��",
103                                     "����‍⚕️",
104                                     "����‍♂️",
105                                     "����‍♀️",
106                                     "��‍❤️‍��‍��",
107                                     "����‍♀️",
108                                     "��",
109                                     "��‍❤️‍��‍��",
110                                     "��",
111                                     "��‍❤️‍��",
112                                     "��",
113                                     "��‍��‍��",
114                                     "����",
115                                     "����",
116                                     "��‍⚖",
117                                     "����‍⚖",
118                                     "��‍⚖",
119                                     "����‍⚖",
120                                     "��",
121                                     "��‍♂️",
122                                     "����‍♂️",
123                                     "��‍♀️",
124                                     "����‍♀️",
125                                     "��",
126                                     "����",
127                                     "��‍♂️",
128                                     "����‍♂️",
129                                     "��‍♀️",
130                                     "����‍♀️",
131                                     "��������������",
132                                     "#️⃣",
133                                     "����",
134                                     "⛹️‍♀️",
135                                     "��‍⚕️",
136                                     "��️‍��",
137                                     "��‍☠️",
138                                     "��‍��",
139                                     "����‍��",
140                                     "��",
141                                     "��"))
142                     .freeze();
143 
writeSubcharts(Anchors anchors)144     public void writeSubcharts(Anchors anchors) throws IOException {
145         Set<String> locales = Annotations.getAvailableLocales();
146 
147         AnnotationSet english = Annotations.getDataSet("en");
148         UnicodeSet s = new UnicodeSet(english.keySet()).addAll(EXTRAS).freeze();
149 
150         // set up right order for columns
151 
152         Map<String, String> nameToCode = new LinkedHashMap<>();
153         Relation<LanguageGroup, R3<Integer, String, String>> groupToNameAndCodeSorted =
154                 Relation.of(
155                         new EnumMap<LanguageGroup, Set<R3<Integer, String, String>>>(
156                                 LanguageGroup.class),
157                         TreeSet.class);
158 
159         Multimap<String, String> localeToSub = TreeMultimap.create();
160         LanguageTagParser ltp = new LanguageTagParser();
161 
162         for (String locale : locales) {
163             ltp.set(locale);
164             if (locale.equals("root")) {
165                 continue;
166             }
167             if (locale.equals("en")) { // make first
168                 continue;
169             }
170             String region = ltp.getRegion();
171             if (!region.isEmpty()) {
172                 localeToSub.put(ltp.getLanguageScript(), locale);
173                 continue;
174             }
175 
176             if (locale.startsWith("en")) {
177                 int debug = 0;
178             }
179             String name = ENGLISH.getName(locale, true);
180             int baseEnd = locale.indexOf('_');
181             ULocale loc = new ULocale(baseEnd < 0 ? locale : locale.substring(0, baseEnd));
182             LanguageGroup group = LanguageGroup.get(loc);
183             int rank = LanguageGroup.rankInGroup(loc);
184             groupToNameAndCodeSorted.put(group, Row.of(rank, name, locale));
185         }
186 
187         for (Entry<LanguageGroup, Set<R3<Integer, String, String>>> groupPairs :
188                 groupToNameAndCodeSorted.keyValuesSet()) {
189             LanguageGroup group = groupPairs.getKey();
190             String ename = ENGLISH.getName("en", true);
191             nameToCode.clear();
192             nameToCode.put(ename, "en"); // always have english first
193 
194             // add English variants if they exist
195 
196             for (R3<Integer, String, String> pair : groupPairs.getValue()) {
197                 String name = pair.get1();
198                 String locale = pair.get2();
199                 if (locale.startsWith("en_")) {
200                     nameToCode.put(name, locale);
201                 }
202             }
203 
204             for (R3<Integer, String, String> pair : groupPairs.getValue()) {
205                 String name = pair.get1();
206                 String locale = pair.get2();
207 
208                 nameToCode.put(name, locale);
209                 System.out.println(pair);
210             }
211             // now build table with right order for columns
212             double width = ((int) ((99.0 / (locales.size() + 1)) * 1000)) / 1000.0;
213             // String widthString = "class='source' width='"+ width + "%'";
214             String widthStringTarget = "class='target' width='" + width + "%'";
215 
216             TablePrinter tablePrinter =
217                     new TablePrinter()
218                             .addColumn(
219                                     "Char",
220                                     "class='source' width='1%'",
221                                     CldrUtility.getDoubleLinkMsg(),
222                                     "class='source-image'",
223                                     true)
224                             .addColumn(
225                                     "Hex",
226                                     "class='source' width='1%'",
227                                     null,
228                                     "class='source'",
229                                     true)
230                     // .addColumn("Formal Name", "class='source' width='" + width + "%'", null,
231                     // "class='source'", true)
232                     ;
233 
234             for (Entry<String, String> entry : nameToCode.entrySet()) {
235                 String name = entry.getKey();
236                 tablePrinter.addColumn(name, widthStringTarget, null, "class='target'", true);
237             }
238             // sort the characters
239             Set<String> sorted = new TreeSet<>(RBC);
240             Multimap<String, String> valueToSub = TreeMultimap.create();
241 
242             for (String cp : s.addAllTo(sorted)) {
243                 tablePrinter.addRow().addCell(cp).addCell(Utility.hex(cp, 4, " "))
244                 // .addCell(getName(cp))
245                 ;
246                 for (Entry<String, String> nameAndLocale : nameToCode.entrySet()) {
247                     String name = nameAndLocale.getKey();
248                     String locale = nameAndLocale.getValue();
249 
250                     AnnotationSet annotations = Annotations.getDataSet(locale);
251                     AnnotationSet parentAnnotations =
252                             Annotations.getDataSet(LocaleIDParser.getParent(locale));
253                     String baseAnnotation = annotations.toString(cp, true, parentAnnotations);
254                     String baseAnnotationOriginal = baseAnnotation;
255 
256                     if (DEBUG)
257                         System.out.println(name + ":" + annotations.toString(cp, false, null));
258                     Collection<String> subs = localeToSub.get(locale);
259                     if (!subs.isEmpty()) {
260                         valueToSub.clear();
261                         for (String sub : subs) {
262                             AnnotationSet subAnnotations = Annotations.getDataSet(sub);
263                             AnnotationSet subParentAnnotations =
264                                     Annotations.getDataSet(LocaleIDParser.getParent(locale));
265                             String baseAnnotation2 =
266                                     subAnnotations.toString(cp, true, subParentAnnotations);
267                             if (!baseAnnotation2.equals(baseAnnotationOriginal)) {
268                                 valueToSub.put(baseAnnotation2, sub);
269                             }
270                         }
271                         for (Entry<String, Collection<String>> entry :
272                                 valueToSub.asMap().entrySet()) {
273                             baseAnnotation +=
274                                     "<hr><i>"
275                                             + Joiner.on(", ").join(entry.getValue())
276                                             + "</i>: "
277                                             + entry.getKey();
278                         }
279                     }
280                     tablePrinter.addCell(baseAnnotation);
281                 }
282                 tablePrinter.finishRow();
283             }
284             final String name = group.toString();
285             new Subchart(name + " Annotations", FileUtilities.anchorize(name), tablePrinter)
286                     .writeChart(anchors);
287         }
288     }
289 
290     static final int FIRST_REGIONAL = 0x1F1E6;
291     static final int LAST_REGIONAL = 0x1F1FF;
292 
getRegionalIndicator(int firstCodepoint)293     public static int getRegionalIndicator(int firstCodepoint) {
294         return FIRST_REGIONAL <= firstCodepoint && firstCodepoint <= LAST_REGIONAL
295                 ? firstCodepoint - FIRST_REGIONAL + 'A'
296                 : -1;
297     }
298 
299     //    private String getName(String cp) {
300     //        int ri1 = getRegionalIndicator(cp.codePointAt(0));
301     //        if (ri1 >= 0) {
302     //            int ri2 = getRegionalIndicator(cp.codePointAt(2));
303     //            return ENGLISH.getName(CLDRFile.TERRITORY_NAME, String.valueOf((char) ri1) +
304     // String.valueOf((char) ri2));
305     //        }
306     //        String result = NAMES80.get(cp);
307     //        return result != null ? result : UCharacter.getName(cp, ", ");
308     //    }
309     //
310     //    private static UnicodeMap<String> NAMES80 = new UnicodeMap<>();
311     //    static {
312     //        String[][] data = {
313     //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-1-2" },
314     //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-3" },
315     //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-4" },
316     //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-5" },
317     //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-6" },
318     //            { "��", "ZIPPER-MOUTH FACE" },
319     //            { "��", "MONEY-MOUTH FACE" },
320     //            { "��", "FACE WITH THERMOMETER" },
321     //            { "��", "NERD FACE" },
322     //            { "��", "THINKING FACE" },
323     //            { "��", "FACE WITH ROLLING EYES" },
324     //            { "��", "UPSIDE-DOWN FACE" },
325     //            { "��", "FACE WITH HEAD-BANDAGE" },
326     //            { "��", "ROBOT FACE" },
327     //            { "��", "HUGGING FACE" },
328     //            { "��", "SIGN OF THE HORNS" },
329     //            { "��", "CRAB (also Cancer)" },
330     //            { "��", "SCORPION (also Scorpio)" },
331     //            { "��", "LION FACE (also Leo)" },
332     //            { "��", "BOW AND ARROW (also Sagittarius)" },
333     //            { "��", "AMPHORA (also Aquarius)" },
334     //            { "��", "PLACE OF WORSHIP" },
335     //            { "��", "KAABA" },
336     //            { "��", "MOSQUE" },
337     //            { "��", "SYNAGOGUE" },
338     //            { "��", "MENORAH WITH NINE BRANCHES" },
339     //            { "��", "PRAYER BEADS" },
340     //            { "��", "HOT DOG" },
341     //            { "��", "TACO" },
342     //            { "��", "BURRITO" },
343     //            { "��", "CHEESE WEDGE" },
344     //            { "��", "POPCORN" },
345     //            { "��", "BOTTLE WITH POPPING CORK" },
346     //            { "��", "TURKEY" },
347     //            { "��", "UNICORN FACE" },
348     //            { "��", "CRICKET BAT AND BALL" },
349     //            { "��", "VOLLEYBALL" },
350     //            { "��", "FIELD HOCKEY STICK AND BALL" },
351     //            { "��", "ICE HOCKEY STICK AND PUCK" },
352     //            { "��", "TABLE TENNIS PADDLE AND BALL" },
353     //            { "��", "BADMINTON RACQUET AND SHUTTLECOCK" } };
354     //        for (String[] pair : data) {
355     //            NAMES80.put(pair[0], pair[1]);
356     //        }
357     //        NAMES80.freeze();
358     //    }
359 
360     private class Subchart extends Chart {
361         String title;
362         String file;
363         private TablePrinter tablePrinter;
364 
365         @Override
getShowDate()366         public boolean getShowDate() {
367             return false;
368         }
369 
Subchart(String title, String file, TablePrinter tablePrinter)370         public Subchart(String title, String file, TablePrinter tablePrinter) {
371             super();
372             this.title = title;
373             this.file = file;
374             this.tablePrinter = tablePrinter;
375         }
376 
377         @Override
getDirectory()378         public String getDirectory() {
379             return DIR;
380         }
381 
382         @Override
getTitle()383         public String getTitle() {
384             return title;
385         }
386 
387         @Override
getFileName()388         public String getFileName() {
389             return file;
390         }
391 
392         @Override
getExplanation()393         public String getExplanation() {
394             return MAIN_HEADER
395                     + "<p>This table shows the annotations for a group of related languages (plus English) for easier comparison. "
396                     + "The first item is the <b>short name</b> (also the text-to-speech phrase). "
397                     + "It is bolded for clarity, and marked with a * for searching on this page. "
398                     + "The remaining phrases are <b>keywords</b> (labels), separated by “|”. "
399                     + "The keywords plus the words in the short name are typically used for search and predictive typing.<p>\n"
400                     + "<p>Most short names and keywords that can be constructed with the mechanism in "
401                     + LDML_ANNOTATIONS
402                     + " are omitted. "
403                     + "However, a few are included for comparison: "
404                     + Joiner.on(", ").join(EXTRAS.addAllTo(new TreeSet<>()))
405                     + ". "
406                     + "In this chart, missing items are marked with “"
407                     + Annotations.MISSING_MARKER
408                     + "”, "
409                     + "‘fallback’ constructed items with “"
410                     + Annotations.BAD_MARKER
411                     + "”, "
412                     + "substituted English values with “"
413                     + Annotations.ENGLISH_MARKER
414                     + "”, and "
415                     + "values equal to their parent locale’s values are replaced with "
416                     + Annotations.EQUIVALENT
417                     + ".</p>\n";
418         }
419 
420         @Override
writeContents(FormattedFileWriter pw)421         public void writeContents(FormattedFileWriter pw) throws IOException {
422             pw.write(tablePrinter.toTable());
423         }
424     }
425 
426     public static RuleBasedCollator RBC;
427 
428     static {
429         Factory cldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "collation/", ".*");
430         CLDRFile root = cldrFactory.make("root", false);
431         String rules =
432                 root.getStringValue(
433                         "//ldml/collations/collation[@type=\"emoji\"][@visibility=\"external\"]/cr");
434 
435         //        if (!rules.contains("'#⃣'")) {
436         //            rules = rules.replace("#⃣", "'#⃣'").replace("*⃣", "'*⃣'"); //hack for 8288
437         //        }
438 
439         try {
440             RBC = new RuleBasedCollator(rules);
441         } catch (Exception e) {
442             throw new IllegalArgumentException(
443                     "Failure in rules for " + CLDRPaths.COMMON_DIRECTORY + "collation/" + "root",
444                     e);
445         }
446     }
447 }
448