1 package org.unicode.cldr.tool; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.Multimap; 5 import com.google.common.collect.TreeMultimap; 6 import com.ibm.icu.impl.Relation; 7 import com.ibm.icu.impl.Row; 8 import com.ibm.icu.impl.Row.R3; 9 import com.ibm.icu.impl.Utility; 10 import com.ibm.icu.text.RuleBasedCollator; 11 import com.ibm.icu.text.UnicodeSet; 12 import com.ibm.icu.util.ULocale; 13 import java.io.IOException; 14 import java.util.Arrays; 15 import java.util.Collection; 16 import java.util.EnumMap; 17 import java.util.LinkedHashMap; 18 import java.util.Map; 19 import java.util.Map.Entry; 20 import java.util.Set; 21 import java.util.TreeSet; 22 import org.unicode.cldr.draft.FileUtilities; 23 import org.unicode.cldr.tool.FormattedFileWriter.Anchors; 24 import org.unicode.cldr.util.Annotations; 25 import org.unicode.cldr.util.Annotations.AnnotationSet; 26 import org.unicode.cldr.util.CLDRFile; 27 import org.unicode.cldr.util.CLDRPaths; 28 import org.unicode.cldr.util.CLDRURLS; 29 import org.unicode.cldr.util.CldrUtility; 30 import org.unicode.cldr.util.Factory; 31 import org.unicode.cldr.util.FileCopier; 32 import org.unicode.cldr.util.LanguageGroup; 33 import org.unicode.cldr.util.LanguageTagParser; 34 import org.unicode.cldr.util.LocaleIDParser; 35 36 public class ChartAnnotations extends Chart { 37 38 private static final String LDML_ANNOTATIONS = 39 "<a href='https://unicode.org/reports/tr35/tr35-general.html#Annotations'>LDML Annotations</a>"; 40 41 private static final String MAIN_HEADER = 42 "<p>Annotations provide names and keywords for Unicode characters, currently focusing on emoji. " 43 + "If you see any problems, please <a target='_blank' href='" 44 + CLDRURLS.CLDR_NEWTICKET_URL 45 + "'>file a ticket</a> with the corrected values for the locale. " 46 + "For the XML data used for these charts, see " 47 + "<a href='http://unicode.org/repos/cldr/tags/latest/common/annotations/'>latest-release annotations </a> " 48 + "or <a href='http://unicode.org/repos/cldr/tags/latest/common/annotations/'>beta annotations</a>. " 49 + "For more information, see " 50 + LDML_ANNOTATIONS 51 + ".</p>"; 52 private static final boolean DEBUG = false; 53 private static final String DIR = CLDRPaths.CHART_DIRECTORY + "annotations/"; 54 main(String[] args)55 public static void main(String[] args) { 56 new ChartAnnotations().writeChart(null); 57 } 58 59 @Override getDirectory()60 public String getDirectory() { 61 return DIR; 62 } 63 64 @Override getTitle()65 public String getTitle() { 66 return "Annotation Charts"; 67 } 68 69 @Override getFileName()70 public String getFileName() { 71 return "index"; 72 } 73 74 @Override getExplanation()75 public String getExplanation() { 76 return MAIN_HEADER 77 + "<p>The charts are presented in groups of related languages, for easier comparison.<p>"; 78 } 79 80 @Override writeContents(FormattedFileWriter pw)81 public void writeContents(FormattedFileWriter pw) throws IOException { 82 FileCopier.ensureDirectoryExists(DIR); 83 FileCopier.copy(Chart.class, "index.css", DIR); 84 FormattedFileWriter.copyIncludeHtmls(DIR); 85 86 FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors(); 87 writeSubcharts(anchors); 88 pw.setIndex("Main Chart Index", "../index.html"); 89 pw.write(anchors.toString()); 90 } 91 92 static final UnicodeSet EXTRAS = 93 new UnicodeSet() 94 .addAll( 95 Arrays.asList( 96 "", 97 "", 98 "#️⃣", 99 "", 100 "❤️", 101 "❤️", 102 "", 103 "⚕️", 104 "♂️", 105 "♀️", 106 "❤️", 107 "♀️", 108 "", 109 "❤️", 110 "", 111 "❤️", 112 "", 113 "", 114 "", 115 "", 116 "⚖", 117 "⚖", 118 "⚖", 119 "⚖", 120 "", 121 "♂️", 122 "♂️", 123 "♀️", 124 "♀️", 125 "", 126 "", 127 "♂️", 128 "♂️", 129 "♀️", 130 "♀️", 131 "", 132 "#️⃣", 133 "", 134 "⛹️♀️", 135 "⚕️", 136 "️", 137 "☠️", 138 "", 139 "", 140 "", 141 "")) 142 .freeze(); 143 writeSubcharts(Anchors anchors)144 public void writeSubcharts(Anchors anchors) throws IOException { 145 Set<String> locales = Annotations.getAvailableLocales(); 146 147 AnnotationSet english = Annotations.getDataSet("en"); 148 UnicodeSet s = new UnicodeSet(english.keySet()).addAll(EXTRAS).freeze(); 149 150 // set up right order for columns 151 152 Map<String, String> nameToCode = new LinkedHashMap<>(); 153 Relation<LanguageGroup, R3<Integer, String, String>> groupToNameAndCodeSorted = 154 Relation.of( 155 new EnumMap<LanguageGroup, Set<R3<Integer, String, String>>>( 156 LanguageGroup.class), 157 TreeSet.class); 158 159 Multimap<String, String> localeToSub = TreeMultimap.create(); 160 LanguageTagParser ltp = new LanguageTagParser(); 161 162 for (String locale : locales) { 163 ltp.set(locale); 164 if (locale.equals("root")) { 165 continue; 166 } 167 if (locale.equals("en")) { // make first 168 continue; 169 } 170 String region = ltp.getRegion(); 171 if (!region.isEmpty()) { 172 localeToSub.put(ltp.getLanguageScript(), locale); 173 continue; 174 } 175 176 if (locale.startsWith("en")) { 177 int debug = 0; 178 } 179 String name = ENGLISH.getName(locale, true); 180 int baseEnd = locale.indexOf('_'); 181 ULocale loc = new ULocale(baseEnd < 0 ? locale : locale.substring(0, baseEnd)); 182 LanguageGroup group = LanguageGroup.get(loc); 183 int rank = LanguageGroup.rankInGroup(loc); 184 groupToNameAndCodeSorted.put(group, Row.of(rank, name, locale)); 185 } 186 187 for (Entry<LanguageGroup, Set<R3<Integer, String, String>>> groupPairs : 188 groupToNameAndCodeSorted.keyValuesSet()) { 189 LanguageGroup group = groupPairs.getKey(); 190 String ename = ENGLISH.getName("en", true); 191 nameToCode.clear(); 192 nameToCode.put(ename, "en"); // always have english first 193 194 // add English variants if they exist 195 196 for (R3<Integer, String, String> pair : groupPairs.getValue()) { 197 String name = pair.get1(); 198 String locale = pair.get2(); 199 if (locale.startsWith("en_")) { 200 nameToCode.put(name, locale); 201 } 202 } 203 204 for (R3<Integer, String, String> pair : groupPairs.getValue()) { 205 String name = pair.get1(); 206 String locale = pair.get2(); 207 208 nameToCode.put(name, locale); 209 System.out.println(pair); 210 } 211 // now build table with right order for columns 212 double width = ((int) ((99.0 / (locales.size() + 1)) * 1000)) / 1000.0; 213 // String widthString = "class='source' width='"+ width + "%'"; 214 String widthStringTarget = "class='target' width='" + width + "%'"; 215 216 TablePrinter tablePrinter = 217 new TablePrinter() 218 .addColumn( 219 "Char", 220 "class='source' width='1%'", 221 CldrUtility.getDoubleLinkMsg(), 222 "class='source-image'", 223 true) 224 .addColumn( 225 "Hex", 226 "class='source' width='1%'", 227 null, 228 "class='source'", 229 true) 230 // .addColumn("Formal Name", "class='source' width='" + width + "%'", null, 231 // "class='source'", true) 232 ; 233 234 for (Entry<String, String> entry : nameToCode.entrySet()) { 235 String name = entry.getKey(); 236 tablePrinter.addColumn(name, widthStringTarget, null, "class='target'", true); 237 } 238 // sort the characters 239 Set<String> sorted = new TreeSet<>(RBC); 240 Multimap<String, String> valueToSub = TreeMultimap.create(); 241 242 for (String cp : s.addAllTo(sorted)) { 243 tablePrinter.addRow().addCell(cp).addCell(Utility.hex(cp, 4, " ")) 244 // .addCell(getName(cp)) 245 ; 246 for (Entry<String, String> nameAndLocale : nameToCode.entrySet()) { 247 String name = nameAndLocale.getKey(); 248 String locale = nameAndLocale.getValue(); 249 250 AnnotationSet annotations = Annotations.getDataSet(locale); 251 AnnotationSet parentAnnotations = 252 Annotations.getDataSet(LocaleIDParser.getParent(locale)); 253 String baseAnnotation = annotations.toString(cp, true, parentAnnotations); 254 String baseAnnotationOriginal = baseAnnotation; 255 256 if (DEBUG) 257 System.out.println(name + ":" + annotations.toString(cp, false, null)); 258 Collection<String> subs = localeToSub.get(locale); 259 if (!subs.isEmpty()) { 260 valueToSub.clear(); 261 for (String sub : subs) { 262 AnnotationSet subAnnotations = Annotations.getDataSet(sub); 263 AnnotationSet subParentAnnotations = 264 Annotations.getDataSet(LocaleIDParser.getParent(locale)); 265 String baseAnnotation2 = 266 subAnnotations.toString(cp, true, subParentAnnotations); 267 if (!baseAnnotation2.equals(baseAnnotationOriginal)) { 268 valueToSub.put(baseAnnotation2, sub); 269 } 270 } 271 for (Entry<String, Collection<String>> entry : 272 valueToSub.asMap().entrySet()) { 273 baseAnnotation += 274 "<hr><i>" 275 + Joiner.on(", ").join(entry.getValue()) 276 + "</i>: " 277 + entry.getKey(); 278 } 279 } 280 tablePrinter.addCell(baseAnnotation); 281 } 282 tablePrinter.finishRow(); 283 } 284 final String name = group.toString(); 285 new Subchart(name + " Annotations", FileUtilities.anchorize(name), tablePrinter) 286 .writeChart(anchors); 287 } 288 } 289 290 static final int FIRST_REGIONAL = 0x1F1E6; 291 static final int LAST_REGIONAL = 0x1F1FF; 292 getRegionalIndicator(int firstCodepoint)293 public static int getRegionalIndicator(int firstCodepoint) { 294 return FIRST_REGIONAL <= firstCodepoint && firstCodepoint <= LAST_REGIONAL 295 ? firstCodepoint - FIRST_REGIONAL + 'A' 296 : -1; 297 } 298 299 // private String getName(String cp) { 300 // int ri1 = getRegionalIndicator(cp.codePointAt(0)); 301 // if (ri1 >= 0) { 302 // int ri2 = getRegionalIndicator(cp.codePointAt(2)); 303 // return ENGLISH.getName(CLDRFile.TERRITORY_NAME, String.valueOf((char) ri1) + 304 // String.valueOf((char) ri2)); 305 // } 306 // String result = NAMES80.get(cp); 307 // return result != null ? result : UCharacter.getName(cp, ", "); 308 // } 309 // 310 // private static UnicodeMap<String> NAMES80 = new UnicodeMap<>(); 311 // static { 312 // String[][] data = { 313 // { "", "EMOJI MODIFIER FITZPATRICK TYPE-1-2" }, 314 // { "", "EMOJI MODIFIER FITZPATRICK TYPE-3" }, 315 // { "", "EMOJI MODIFIER FITZPATRICK TYPE-4" }, 316 // { "", "EMOJI MODIFIER FITZPATRICK TYPE-5" }, 317 // { "", "EMOJI MODIFIER FITZPATRICK TYPE-6" }, 318 // { "", "ZIPPER-MOUTH FACE" }, 319 // { "", "MONEY-MOUTH FACE" }, 320 // { "", "FACE WITH THERMOMETER" }, 321 // { "", "NERD FACE" }, 322 // { "", "THINKING FACE" }, 323 // { "", "FACE WITH ROLLING EYES" }, 324 // { "", "UPSIDE-DOWN FACE" }, 325 // { "", "FACE WITH HEAD-BANDAGE" }, 326 // { "", "ROBOT FACE" }, 327 // { "", "HUGGING FACE" }, 328 // { "", "SIGN OF THE HORNS" }, 329 // { "", "CRAB (also Cancer)" }, 330 // { "", "SCORPION (also Scorpio)" }, 331 // { "", "LION FACE (also Leo)" }, 332 // { "", "BOW AND ARROW (also Sagittarius)" }, 333 // { "", "AMPHORA (also Aquarius)" }, 334 // { "", "PLACE OF WORSHIP" }, 335 // { "", "KAABA" }, 336 // { "", "MOSQUE" }, 337 // { "", "SYNAGOGUE" }, 338 // { "", "MENORAH WITH NINE BRANCHES" }, 339 // { "", "PRAYER BEADS" }, 340 // { "", "HOT DOG" }, 341 // { "", "TACO" }, 342 // { "", "BURRITO" }, 343 // { "", "CHEESE WEDGE" }, 344 // { "", "POPCORN" }, 345 // { "", "BOTTLE WITH POPPING CORK" }, 346 // { "", "TURKEY" }, 347 // { "", "UNICORN FACE" }, 348 // { "", "CRICKET BAT AND BALL" }, 349 // { "", "VOLLEYBALL" }, 350 // { "", "FIELD HOCKEY STICK AND BALL" }, 351 // { "", "ICE HOCKEY STICK AND PUCK" }, 352 // { "", "TABLE TENNIS PADDLE AND BALL" }, 353 // { "", "BADMINTON RACQUET AND SHUTTLECOCK" } }; 354 // for (String[] pair : data) { 355 // NAMES80.put(pair[0], pair[1]); 356 // } 357 // NAMES80.freeze(); 358 // } 359 360 private class Subchart extends Chart { 361 String title; 362 String file; 363 private TablePrinter tablePrinter; 364 365 @Override getShowDate()366 public boolean getShowDate() { 367 return false; 368 } 369 Subchart(String title, String file, TablePrinter tablePrinter)370 public Subchart(String title, String file, TablePrinter tablePrinter) { 371 super(); 372 this.title = title; 373 this.file = file; 374 this.tablePrinter = tablePrinter; 375 } 376 377 @Override getDirectory()378 public String getDirectory() { 379 return DIR; 380 } 381 382 @Override getTitle()383 public String getTitle() { 384 return title; 385 } 386 387 @Override getFileName()388 public String getFileName() { 389 return file; 390 } 391 392 @Override getExplanation()393 public String getExplanation() { 394 return MAIN_HEADER 395 + "<p>This table shows the annotations for a group of related languages (plus English) for easier comparison. " 396 + "The first item is the <b>short name</b> (also the text-to-speech phrase). " 397 + "It is bolded for clarity, and marked with a * for searching on this page. " 398 + "The remaining phrases are <b>keywords</b> (labels), separated by “|”. " 399 + "The keywords plus the words in the short name are typically used for search and predictive typing.<p>\n" 400 + "<p>Most short names and keywords that can be constructed with the mechanism in " 401 + LDML_ANNOTATIONS 402 + " are omitted. " 403 + "However, a few are included for comparison: " 404 + Joiner.on(", ").join(EXTRAS.addAllTo(new TreeSet<>())) 405 + ". " 406 + "In this chart, missing items are marked with “" 407 + Annotations.MISSING_MARKER 408 + "”, " 409 + "‘fallback’ constructed items with “" 410 + Annotations.BAD_MARKER 411 + "”, " 412 + "substituted English values with “" 413 + Annotations.ENGLISH_MARKER 414 + "”, and " 415 + "values equal to their parent locale’s values are replaced with " 416 + Annotations.EQUIVALENT 417 + ".</p>\n"; 418 } 419 420 @Override writeContents(FormattedFileWriter pw)421 public void writeContents(FormattedFileWriter pw) throws IOException { 422 pw.write(tablePrinter.toTable()); 423 } 424 } 425 426 public static RuleBasedCollator RBC; 427 428 static { 429 Factory cldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "collation/", ".*"); 430 CLDRFile root = cldrFactory.make("root", false); 431 String rules = 432 root.getStringValue( 433 "//ldml/collations/collation[@type=\"emoji\"][@visibility=\"external\"]/cr"); 434 435 // if (!rules.contains("'#⃣'")) { 436 // rules = rules.replace("#⃣", "'#⃣'").replace("*⃣", "'*⃣'"); //hack for 8288 437 // } 438 439 try { 440 RBC = new RuleBasedCollator(rules); 441 } catch (Exception e) { 442 throw new IllegalArgumentException( 443 "Failure in rules for " + CLDRPaths.COMMON_DIRECTORY + "collation/" + "root", 444 e); 445 } 446 } 447 } 448