1 package org.unicode.cldr.tool; 2 3 import java.io.IOException; 4 import java.io.PrintWriter; 5 import java.util.BitSet; 6 import java.util.Collection; 7 import java.util.HashMap; 8 import java.util.HashSet; 9 import java.util.LinkedHashSet; 10 import java.util.List; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.TreeMap; 15 import java.util.TreeSet; 16 import java.util.regex.Matcher; 17 18 import org.unicode.cldr.draft.FileUtilities; 19 import org.unicode.cldr.draft.Keyboard; 20 import org.unicode.cldr.draft.Keyboard.Gesture; 21 import org.unicode.cldr.draft.Keyboard.Iso; 22 import org.unicode.cldr.draft.Keyboard.KeyMap; 23 import org.unicode.cldr.draft.Keyboard.Output; 24 import org.unicode.cldr.draft.Keyboard.TransformStatus; 25 import org.unicode.cldr.draft.Keyboard.TransformType; 26 import org.unicode.cldr.draft.Keyboard.Transforms; 27 import org.unicode.cldr.draft.KeyboardModifierSet; 28 import org.unicode.cldr.tool.Option.Options; 29 import org.unicode.cldr.util.CLDRConfig; 30 import org.unicode.cldr.util.CLDRFile; 31 import org.unicode.cldr.util.CLDRFile.WinningChoice; 32 import org.unicode.cldr.util.CLDRPaths; 33 import org.unicode.cldr.util.CLDRTool; 34 import org.unicode.cldr.util.CldrUtility; 35 import org.unicode.cldr.util.Counter; 36 import org.unicode.cldr.util.Factory; 37 import org.unicode.cldr.util.FileCopier; 38 import org.unicode.cldr.util.LanguageTagCanonicalizer; 39 import org.unicode.cldr.util.Log; 40 import org.unicode.cldr.util.PatternCache; 41 import org.unicode.cldr.util.SupplementalDataInfo; 42 import org.unicode.cldr.util.TransliteratorUtilities; 43 import org.unicode.cldr.util.UnicodeSetPrettyPrinter; 44 45 import com.google.common.base.Joiner; 46 import com.ibm.icu.impl.Relation; 47 import com.ibm.icu.impl.Row; 48 import com.ibm.icu.impl.Row.R2; 49 import com.ibm.icu.impl.Row.R3; 50 import com.ibm.icu.impl.Utility; 51 import com.ibm.icu.lang.UCharacter; 52 import com.ibm.icu.lang.UProperty; 53 import com.ibm.icu.lang.UScript; 54 import com.ibm.icu.text.Collator; 55 import com.ibm.icu.text.Transliterator; 56 import com.ibm.icu.text.UnicodeSet; 57 import com.ibm.icu.util.ICUUncheckedIOException; 58 import com.ibm.icu.util.ULocale; 59 60 @CLDRTool(alias = "showkeyboards", description = "Generate keyboard charts") 61 public class ShowKeyboards { 62 // TODO - fix ' > xxx 63 // TODO - check for bad locale ids 64 65 private static final String ABOUT_KEYBOARD_CHARTS = "<p>For more information, see <a target='ABOUT_KB' href='http://cldr.unicode.org/index/charts/keyboards'>About Keyboard Charts</a>.</p>"; 66 private static String keyboardChartDir; 67 private static String keyboardChartLayoutsDir; 68 static final CLDRConfig testInfo = ToolConfig.getToolInstance(); 69 static final Factory factory = testInfo.getCldrFactory(); 70 71 static final boolean SHOW_BACKGROUND = false; 72 73 final static Options myOptions = new Options(); 74 75 enum MyOptions { 76 idFilter(".+", ".*", "Filter the information based on id, using a regex argument."), sourceDirectory(".+", CLDRPaths.BASE_DIRECTORY + "keyboards/", 77 "The source directory. CURRENTLY CAN’T BE CHANGED!!"), targetDirectory(".+", CLDRPaths.CHART_DIRECTORY + "keyboards/", 78 "The target directory."), layouts(null, null, 79 "Only create html files for keyboard layouts"), repertoire(null, null, "Only create html files for repertoire"),; 80 // boilerplate 81 final Option option; 82 MyOptions(String argumentPattern, String defaultArgument, String helpText)83 MyOptions(String argumentPattern, String defaultArgument, String helpText) { 84 option = myOptions.add(this, argumentPattern, defaultArgument, helpText); 85 } 86 } 87 88 static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance(); 89 90 // ********************************************* 91 // Temporary, for some simple testing 92 // ********************************************* main(String[] args)93 public static void main(String[] args) throws IOException { 94 myOptions.parse(MyOptions.idFilter, args, true); 95 String idPattern = MyOptions.idFilter.option.getValue(); 96 keyboardChartDir = MyOptions.targetDirectory.option.getValue(); 97 keyboardChartLayoutsDir = keyboardChartDir + "/layouts/"; 98 99 FileCopier.ensureDirectoryExists(keyboardChartDir); 100 FileCopier.copy(ShowKeyboards.class, "keyboards-index.html", keyboardChartDir, "index.html"); 101 102 Matcher idMatcher = PatternCache.get(idPattern).matcher(""); 103 try { 104 Log.setLog(CLDRPaths.LOG_DIRECTORY + "keyboard-log.txt"); 105 } catch (IOException e) { 106 throw new ICUUncheckedIOException(e); 107 } 108 boolean layoutsOnly = MyOptions.layouts.option.doesOccur(); 109 boolean repertoireOnly = MyOptions.repertoire.option.doesOccur(); 110 111 if (!repertoireOnly) { 112 showHtml(idMatcher); 113 } 114 if (!layoutsOnly) { 115 showRepertoire(idMatcher); 116 } 117 } 118 showRepertoire(Matcher idMatcher)119 public static void showRepertoire(Matcher idMatcher) { 120 Set<Exception> totalErrors = new LinkedHashSet<>(); 121 Set<Exception> errors = new LinkedHashSet<>(); 122 UnicodeSet controls = new UnicodeSet("[:Cc:]").freeze(); 123 // check what the characters are, excluding controls. 124 Map<Id, UnicodeSet> id2unicodeset = new TreeMap<>(); 125 Set<String> totalModifiers = new LinkedHashSet<>(); 126 Relation<String, Id> locale2ids = Relation.of(new TreeMap<String, Set<Id>>(), TreeSet.class); 127 LanguageTagCanonicalizer canonicalizer = new LanguageTagCanonicalizer(); 128 IdInfo idInfo = new IdInfo(); 129 for (String platformId : Keyboard.getPlatformIDs()) { 130 //Platform p = Keyboard.getPlatform(platformId); 131 // System.out.println(platformId + "\t" + p.getHardwareMap()); 132 for (String keyboardId : Keyboard.getKeyboardIDs(platformId)) { 133 if (!idMatcher.reset(keyboardId).matches()) { 134 continue; 135 } 136 Keyboard keyboard = Keyboard.getKeyboard(platformId, keyboardId, errors); 137 for (Exception error : errors) { 138 totalErrors.add(new IllegalArgumentException(keyboardId, error)); 139 } 140 UnicodeSet unicodeSet = keyboard.getPossibleResults().removeAll(controls); 141 final Id id = new Id(keyboardId, keyboard.getPlatformVersion()); 142 idInfo.add(id, unicodeSet); 143 String canonicalLocale = canonicalizer.transform(id.locale).replace('_', '-'); 144 if (!id.locale.equals(canonicalLocale)) { 145 totalErrors.add(new IllegalArgumentException("Non-canonical id: " + id.locale + "\t=>\t" + canonicalLocale)); 146 } 147 id2unicodeset.put(id, unicodeSet.freeze()); 148 locale2ids.put(id.locale, id); 149 System.out.println(id.toString().replace('/', '\t') + "\t" + keyboard.getNames()); 150 for (KeyMap keymap : keyboard.getKeyMaps()) { 151 totalModifiers.add(keymap.getModifiers().toString()); 152 } 153 } 154 } 155 if (totalErrors.size() != 0) { 156 System.out.println("Errors\t" + Joiner.on(System.lineSeparator() + "\t") 157 .join(totalErrors)); 158 } 159 for (String item : totalModifiers) { 160 System.out.println(item); 161 } 162 // logInfo.put(Row.of("k-cldr",common), keyboardId); 163 try { 164 FileCopier.copy(ShowKeyboards.class, "keyboards.css", keyboardChartDir, "index.css"); 165 FormattedFileWriter.copyIncludeHtmls(keyboardChartDir); 166 167 PrintWriter out = FileUtilities.openUTF8Writer(keyboardChartDir, "chars2keyboards.html"); 168 String[] headerAndFooter = new String[2]; 169 170 ShowData.getChartTemplate( 171 "Characters → Keyboards", 172 ToolConstants.CHART_DISPLAY_VERSION, 173 "", 174 headerAndFooter, null, false); 175 out.println(headerAndFooter[0] + ABOUT_KEYBOARD_CHARTS); 176 177 // printTop("Characters → Keyboards", out); 178 idInfo.print(out); 179 // printBottom(out); 180 out.println(headerAndFooter[1]); 181 out.close(); 182 183 out = FileUtilities.openUTF8Writer(keyboardChartDir, "keyboards2chars.html"); 184 ShowData.getChartTemplate( 185 "Keyboards → Characters", 186 ToolConstants.CHART_DISPLAY_VERSION, 187 "", 188 headerAndFooter, null, false); 189 out.println(headerAndFooter[0] 190 + ABOUT_KEYBOARD_CHARTS); 191 // printTop("Keyboards → Characters", out); 192 showLocaleToCharacters(out, id2unicodeset, locale2ids); 193 // printBottom(out); 194 out.println(headerAndFooter[1]); 195 out.close(); 196 } catch (IOException e1) { 197 e1.printStackTrace(); 198 } 199 for (Entry<R2<String, UnicodeSet>, Set<Id>> entry : logInfo.keyValuesSet()) { 200 IdSet idSet = new IdSet(); 201 idSet.addAll(entry.getValue()); 202 Log.logln(entry.getKey().get0() + "\t" + entry.getKey().get1().toPattern(false) + "\t" 203 + idSet.toString(idInfo.allIds)); 204 } 205 Log.close(); 206 } 207 showHtml(Matcher idMatcher)208 private static void showHtml(Matcher idMatcher) throws IOException { 209 Set<Exception> errors = new LinkedHashSet<>(); 210 Relation<String, Row.R3<String, String, String>> locale2keyboards = Relation.of( 211 new TreeMap<String, Set<Row.R3<String, String, String>>>(), TreeSet.class); 212 Map<String, String> localeIndex = new TreeMap<>(); 213 214 for (String platformId : Keyboard.getPlatformIDs()) { 215 //Platform p = Keyboard.getPlatform(platformId); 216 // System.out.println(platformId + "\t" + p.getHardwareMap()); 217 for (String keyboardId : Keyboard.getKeyboardIDs(platformId)) { 218 if (!idMatcher.reset(keyboardId).matches()) { 219 continue; 220 } 221 String baseLocale = keyboardId.substring(0, keyboardId.indexOf('-')); 222 String locale = keyboardId.substring(0, keyboardId.indexOf("-t-")); 223 locale2keyboards.put(baseLocale, Row.of(platformId, locale, keyboardId)); 224 225 final String localeName = testInfo.getEnglish().getName(baseLocale, true); 226 localeIndex.put(localeName, baseLocale); 227 } 228 } 229 230 FileCopier.ensureDirectoryExists(keyboardChartLayoutsDir); 231 FileCopier.copy(ShowKeyboards.class, "keyboards.css", keyboardChartLayoutsDir, "index.css"); 232 FormattedFileWriter.copyIncludeHtmls(keyboardChartLayoutsDir); 233 PrintWriter index = FileUtilities.openUTF8Writer(keyboardChartLayoutsDir, "index.html"); 234 String[] headerAndFooter = new String[2]; 235 ShowData.getChartTemplate( 236 "Keyboard Layout Index", 237 ToolConstants.CHART_DISPLAY_VERSION, 238 "", 239 headerAndFooter, "Keyboard Index", false); 240 index 241 .println(headerAndFooter[0] + ABOUT_KEYBOARD_CHARTS); 242 // printTop("Keyboard Layout Index", index); 243 index.println("<ol>"); 244 for (Entry<String, String> entry : localeIndex.entrySet()) { 245 index.println("<li><a href='" + entry.getValue() + ".html'>" 246 + entry.getKey() + "</a>" 247 + " [" + entry.getValue() + "]" + 248 "</li>"); 249 } 250 index.println("</ol>"); 251 index.println(headerAndFooter[1]); 252 // printBottom(index); 253 index.close(); 254 // FileUtilities.copyFile(ShowKeyboards.class, "keyboards.css", keyboardChartLayoutsDir); 255 256 for (Entry<String, Set<R3<String, String, String>>> localeKeyboards : locale2keyboards.keyValuesSet()) { 257 String locale = localeKeyboards.getKey(); 258 final String localeName = testInfo.getEnglish().getName(locale); 259 260 // String localeNameString = localeName.replace(' ', '_').toLowerCase(Locale.ENGLISH); 261 PrintWriter out = FileUtilities.openUTF8Writer(keyboardChartLayoutsDir, locale + ".html"); 262 ShowData.getChartTemplate( 263 "Layouts: " + localeName + " (" + locale + ")", 264 ToolConstants.CHART_DISPLAY_VERSION, 265 "", 266 headerAndFooter, null, false); 267 out.println(headerAndFooter[0] + ABOUT_KEYBOARD_CHARTS); 268 // printTop("Layouts: " + localeName + " (" + locale + ")", out); 269 Set<R3<String, String, String>> keyboards = localeKeyboards.getValue(); 270 for (R3<String, String, String> platformKeyboard : keyboards) { 271 String platformId = platformKeyboard.get0(); 272 String keyboardId = platformKeyboard.get2(); 273 // System.out.println(platformId + "\t" + p.getHardwareMap()); 274 Keyboard keyboard = Keyboard.getKeyboard(platformId, keyboardId, errors); 275 showErrors(errors); 276 Set<String> names = keyboard.getNames(); 277 String platformFromKeyboardId = Keyboard.getPlatformId(keyboardId); 278 String printId = platformId.equals(platformFromKeyboardId) ? keyboardId : keyboardId + "/und"; 279 out.println("<h2>" + CldrUtility.getDoubleLinkedText(printId, printId) 280 + (names.size() == 0 ? "" : " " + names) 281 + "</h2>"); 282 283 Transforms transforms = keyboard.getTransforms().get(TransformType.SIMPLE); 284 285 out.println("<table class='keyboards'><tr>"); 286 for (KeyMap map : keyboard.getKeyMaps()) { 287 KeyboardModifierSet mods = map.getModifiers(); 288 out.println("<td class='keyboardTD'><table class='keyboard'>"); 289 // KeyboardModifierSet modifiers = map.getModifiers(); 290 Map<Iso, Output> isoMap = map.getIso2Output(); 291 for (Keyboard.IsoRow row : Keyboard.IsoRow.values()) { 292 out.println("<tr>"); 293 for (Iso isoValue : Iso.values()) { 294 if (isoValue.isoRow != row) { 295 continue; 296 } 297 Output output = isoMap.get(isoValue); 298 if (output == null) { 299 out.println("<td class='x'> </td>"); 300 continue; 301 } 302 String chars = output.getOutput(); 303 TransformStatus transformStatus = output.getTransformStatus(); 304 StringBuilder hover = new StringBuilder(); 305 if (transformStatus == TransformStatus.DEFAULT && transforms != null) { 306 Map<String, String> map2 = transforms.getMatch(chars); 307 add(map2, hover); 308 } 309 Map<Gesture, List<String>> gestures = output.getGestures(); 310 if (!gestures.isEmpty()) { 311 add(gestures, hover); 312 } 313 final String longPress = hover.length() == 0 ? "" 314 : " title='" + hover + "'"; 315 out.println("<td class='" + (hover.length() == 0 ? 'm' : 'h') + 316 "'" + longPress + ">" 317 + toSafeHtml(chars) + "</td>"); 318 } 319 out.println("</tr>"); 320 } 321 String modsString = mods.getShortInput(); 322 if (modsString.isEmpty()) { 323 modsString = "\u00A0"; 324 } else if (modsString.length() > 20) { 325 modsString = modsString.substring(0, 20) + "…"; 326 } 327 out.println("</table><span class='modifiers'>" 328 + TransliteratorUtilities.toHTML.transform(modsString) + 329 "</span></td>"); 330 } 331 out.println("</tr></table>"); 332 } 333 index.println(headerAndFooter[1]); 334 // printBottom(out); 335 out.close(); 336 } 337 System.out.println("Failing Invisibles: " + FAILING_INVISIBLE.retainAll(INVISIBLE)); 338 } 339 showErrors(Set<Exception> errors)340 private static void showErrors(Set<Exception> errors) { 341 for (Exception error : errors) { 342 String title = error.getMessage().contains("No minimal data for") ? "Warning" : "Error"; 343 System.out.println("\t*" + title + ":\t" + error); 344 } 345 } 346 347 static Transliterator TO_SAFE_HTML; 348 static { 349 StringBuilder rules = new StringBuilder(TransliteratorUtilities.toHTML.toRules(false)); 350 for (char i = 0; i < 0x20; ++i) { String.valueOf(i)351 addRule(String.valueOf(i), "^" + String.valueOf((char) (i + 0x40)), rules); 352 } 353 String[][] map = { 354 // {"\u0020","sp"}, 355 { "\u007F", "del" }, 356 { "\u00A0", "nbsp" }, 357 { "\u00AD", "shy" }, 358 { "\u200B", "zwsp" }, 359 { "\u200C", "zwnj" }, 360 { "\u200D", "zwj" }, 361 { "\u200E", "lrm" }, 362 { "\u200F", "rlm" }, 363 { "\u202F", "nnbs" }, 364 { "\uFEFF", "bom" }, 365 { "\u180B", "mvs1" }, 366 { "\u180C", "mvs2" }, 367 { "\u180D", "mvs3" }, 368 { "\u180E", "mvs" }, 369 // {"\uF8FF","appl"}, 370 }; 371 for (String[] items : map) { 372 final String fromItem = items[0]; 373 final String toItem = items[1]; addRule(fromItem, toItem, rules)374 addRule(fromItem, toItem, rules); 375 } 376 TO_SAFE_HTML = Transliterator.createFromRules("none", rules.toString(), Transliterator.FORWARD); 377 } 378 addRule(final String fromItem, final String toItem, StringBuilder rules)379 public static void addRule(final String fromItem, final String toItem, StringBuilder rules) { 380 rules.append("'" 381 + fromItem 382 + "'>" 383 + "'<span class=\"cc\">" 384 + toItem 385 + "</span>'" 386 + ";" 387 + System.lineSeparator()); 388 } 389 390 static UnicodeSet INVISIBLE = new UnicodeSet("[[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]-[\\u0020]]").freeze(); 391 static UnicodeSet FAILING_INVISIBLE = new UnicodeSet(); 392 toSafeHtml(Object hover)393 public static String toSafeHtml(Object hover) { 394 String result = TO_SAFE_HTML.transform(hover.toString()); 395 if (INVISIBLE.containsSome(result)) { 396 FAILING_INVISIBLE.addAll(result); 397 } 398 return result; 399 } 400 add(Map<K, V> map2, StringBuilder hover)401 private static <K, V> void add(Map<K, V> map2, StringBuilder hover) { 402 if (!map2.isEmpty()) { 403 for (Entry<K, V> entry : map2.entrySet()) { 404 if (hover.length() != 0) { 405 hover.append("; "); 406 } 407 final K key = entry.getKey(); 408 String keyString = key == Gesture.LONGPRESS ? "LP" : key.toString(); 409 final V value = entry.getValue(); 410 String valueString = value instanceof Collection 411 ? Joiner.on(" ").join((Collection) value) 412 : value.toString(); 413 hover.append(TransliteratorUtilities.toHTML.transform(keyString)).append("→") 414 .append(TransliteratorUtilities.toHTML.transform(valueString)); 415 } 416 } 417 } 418 419 // public static void printTop(String title, PrintWriter out) { 420 // out.println( 421 // "<html>\n" + 422 // "<head>\n" + 423 // "<meta http-equiv='Content-Type' content='text/html; charset=UTF-8'/>\n" + 424 // "<link rel='stylesheet' type='text/css' href='keyboards.css'>" + 425 // "<title>" + title + "</title>\n" + 426 // "</head>\n" + 427 // "<body>\n" + 428 // "<h1>DRAFT " + 429 // title + 430 // "</h1>\n" + 431 // "<p>For more information, see <a href='http://cldr.unicode.org/index/charts/keyboards'>Keyboard Charts</a>.</p>" 432 // ); 433 // } 434 // 435 // public static void printBottom(PrintWriter pw) { 436 // pw.println( 437 // "</body>\n" + 438 // "</html>" 439 // ); 440 // } 441 showLocaleToCharacters(PrintWriter out, Map<Id, UnicodeSet> id2unicodeset, Relation<String, Id> locale2ids)442 public static void showLocaleToCharacters(PrintWriter out, Map<Id, UnicodeSet> id2unicodeset, 443 Relation<String, Id> locale2ids) { 444 445 TablePrinter t = new TablePrinter() 446 .addColumn("Name").setSpanRows(true).setBreakSpans(true).setSortPriority(0) 447 .setCellAttributes("class='cell'") 448 .addColumn("Locale").setSpanRows(true).setBreakSpans(true).setCellAttributes("class='cell'") 449 .addColumn("Platform").setSpanRows(true).setCellAttributes("class='cell'") 450 .addColumn("Variant").setCellAttributes("class='cell'") 451 .addColumn("Script").setCellAttributes("class='cell'") 452 .addColumn("Statistics").setCellAttributes("class='cell'") 453 .addColumn("Characters").setSpanRows(true).setCellAttributes("class='cell'"); 454 455 Map<String, UnicodeSet> commonSets = new HashMap<>(); 456 Counter<String> commonCount = new Counter<>(); 457 Set<String> commonDone = new HashSet<>(); 458 459 for (Entry<String, Set<Id>> localeAndIds : locale2ids.keyValuesSet()) { 460 final String key = localeAndIds.getKey(); 461 final Set<Id> keyboardIds = localeAndIds.getValue(); 462 463 // System.out.println(); 464 final String localeName = testInfo.getEnglish().getName(key, true); 465 final String linkedLocaleName = CldrUtility.getDoubleLinkedText(key, localeName); 466 final ULocale uLocale = ULocale.forLanguageTag(key); 467 String script = uLocale.getScript(); 468 String writtenLanguage = uLocale.getLanguage() + (script.isEmpty() ? "" : "_" + script); 469 CLDRFile cldrFile = null; 470 try { 471 cldrFile = factory.make(writtenLanguage, true); 472 } catch (Exception e) { 473 } 474 475 // final String heading = uLocale.getDisplayName(ULocale.ENGLISH) 476 // + "\t" + ULocale.addLikelySubtags(uLocale).getScript() 477 // + "\t"; 478 UnicodeSet common = UnicodeSet.EMPTY; 479 final String likelyScript = ULocale.addLikelySubtags(uLocale).getScript(); 480 commonCount.clear(); 481 for (String platform : Keyboard.getPlatformIDs()) { 482 commonSets.put(platform, UnicodeSet.EMPTY); 483 } 484 if (keyboardIds.size() > 1) { 485 common = UnicodeSet.EMPTY; 486 for (Id keyboardId : keyboardIds) { 487 final UnicodeSet keyboardSet = id2unicodeset.get(keyboardId); 488 if (common == UnicodeSet.EMPTY) { 489 common = new UnicodeSet(keyboardSet); 490 } else { 491 common.retainAll(keyboardSet); 492 } 493 UnicodeSet platformCommon = commonSets.get(keyboardId.platform); 494 commonCount.add(keyboardId.platform, 1); 495 if (platformCommon == UnicodeSet.EMPTY) { 496 commonSets.put(keyboardId.platform, new UnicodeSet(keyboardSet)); 497 } else { 498 platformCommon.retainAll(keyboardSet); 499 } 500 } 501 common.freeze(); 502 t.addRow() 503 .addCell(linkedLocaleName) // name 504 .addCell(key) // locale 505 .addCell("ALL") // platform 506 .addCell("COMMON") // variant 507 .addCell(likelyScript) // script 508 .addCell(getInfo(null, common, cldrFile)) // stats 509 .addCell(safeUnicodeSet(common)) // characters 510 .finishRow(); 511 512 // System.out.println( 513 // locale + "\tCOMMON\t\t-" 514 // + "\t" + heading + getInfo(common, cldrFile) 515 // + "\t" + common.toPattern(false)); 516 } 517 commonDone.clear(); 518 for (Id keyboardId : keyboardIds) { 519 UnicodeSet platformCommon = commonSets.get(keyboardId.platform); 520 if (!commonDone.contains(keyboardId.platform)) { 521 commonDone.add(keyboardId.platform); 522 if (commonCount.get(keyboardId.platform) <= 1) { 523 platformCommon = UnicodeSet.EMPTY; 524 commonSets.put(keyboardId.platform, platformCommon); 525 } else if (platformCommon.size() > 0) { 526 // get stats for all, but otherwise remove common. 527 final String stats = getInfo(null, platformCommon, cldrFile); 528 platformCommon.removeAll(common).freeze(); 529 commonSets.put(keyboardId.platform, platformCommon); 530 t.addRow() 531 .addCell(linkedLocaleName) // name 532 .addCell(key) // locale 533 .addCell(keyboardId.platform) // platform 534 .addCell("COMMON") // variant 535 .addCell(likelyScript) // script 536 .addCell(stats) // stats 537 .addCell(safeUnicodeSet(platformCommon)) // characters 538 .finishRow(); 539 } 540 } 541 final UnicodeSet current2 = id2unicodeset.get(keyboardId); 542 final UnicodeSet remainder = new UnicodeSet(current2) 543 .removeAll(common) 544 .removeAll(platformCommon); 545 546 t.addRow() 547 .addCell(linkedLocaleName) // name 548 .addCell(key) // locale 549 .addCell(keyboardId.platform) // platform 550 .addCell(keyboardId.variant) // variant 551 .addCell(likelyScript) // script 552 .addCell(getInfo(keyboardId, current2, cldrFile)) // stats 553 .addCell(safeUnicodeSet(remainder)) // characters 554 .finishRow(); 555 // System.out.println( 556 // keyboardId.toString().replace('/','\t') 557 // + "\t" + keyboardId.platformVersion 558 // + "\t" + heading + getInfo(current2, cldrFile) 559 // + "\t" + remainder.toPattern(false)); 560 } 561 } 562 out.println(t.toTable()); 563 } 564 565 static UnicodeSetPrettyPrinter prettyPrinter = new UnicodeSetPrettyPrinter() 566 .setOrdering(Collator.getInstance(ULocale.ROOT)) 567 .setSpaceComparator(Collator.getInstance(ULocale.ROOT).setStrength2(Collator.PRIMARY)); 568 safeUnicodeSet(UnicodeSet unicodeSet)569 public static String safeUnicodeSet(UnicodeSet unicodeSet) { 570 return TransliteratorUtilities.toHTML.transform(prettyPrinter.format(unicodeSet)); 571 } 572 573 static class IdInfo { 574 final Collator collator = Collator.getInstance(ULocale.ENGLISH); 575 BitSet bitset = new BitSet(); 576 BitSet bitset2 = new BitSet(); 577 @SuppressWarnings("unchecked") 578 TreeMap<String, IdSet>[] charToKeyboards = new TreeMap[UScript.CODE_LIMIT]; 579 { 580 collator.setStrength(Collator.IDENTICAL); 581 for (int i = 0; i < charToKeyboards.length; ++i) { 582 charToKeyboards[i] = new TreeMap<>(collator); 583 } 584 } 585 IdSet allIds = new IdSet(); 586 add(Id id, UnicodeSet unicodeSet)587 public void add(Id id, UnicodeSet unicodeSet) { 588 allIds.add(id); 589 for (String s : unicodeSet) { 590 int script = getScriptExtensions(s, bitset); 591 if (script >= 0) { 592 addToScript(script, id, s); 593 } else { 594 for (int script2 = bitset.nextSetBit(0); script2 >= 0; script2 = bitset.nextSetBit(script2 + 1)) { 595 addToScript(script2, id, s); 596 } 597 } 598 } 599 } 600 getScriptExtensions(String s, BitSet outputBitset)601 public int getScriptExtensions(String s, BitSet outputBitset) { 602 final int firstCodePoint = s.codePointAt(0); 603 int result = UScript.getScriptExtensions(firstCodePoint, outputBitset); 604 final int firstCodePointCount = Character.charCount(firstCodePoint); 605 if (s.length() == firstCodePointCount) { 606 return result; 607 } 608 for (int i = firstCodePointCount; i < s.length();) { 609 int ch = s.codePointAt(i); 610 UScript.getScriptExtensions(ch, bitset2); 611 outputBitset.or(bitset2); 612 i += Character.charCount(ch); 613 } 614 // remove inherited, if there is anything else; then remove common if there is anything else 615 int cardinality = outputBitset.cardinality(); 616 if (cardinality > 1) { 617 if (outputBitset.get(UScript.INHERITED)) { 618 outputBitset.clear(UScript.INHERITED); 619 --cardinality; 620 } 621 if (cardinality > 1) { 622 if (outputBitset.get(UScript.COMMON)) { 623 outputBitset.clear(UScript.COMMON); 624 --cardinality; 625 } 626 } 627 } 628 if (cardinality == 1) { 629 return outputBitset.nextSetBit(0); 630 } else { 631 return -cardinality; 632 } 633 } 634 addToScript(int script, Id id, String s)635 public void addToScript(int script, Id id, String s) { 636 TreeMap<String, IdSet> charToKeyboard = charToKeyboards[script]; 637 IdSet idSet = charToKeyboard.get(s); 638 if (idSet == null) { 639 charToKeyboard.put(s, idSet = new IdSet()); 640 } 641 idSet.add(id); 642 } 643 print(PrintWriter pw)644 public void print(PrintWriter pw) { 645 646 TablePrinter t = new TablePrinter() 647 .addColumn("Script").setSpanRows(true).setCellAttributes("class='s'") 648 .addColumn("Char").setCellAttributes("class='ch'") 649 .addColumn("Code").setCellAttributes("class='c'") 650 .addColumn("Name").setCellAttributes("class='n'") 651 .addColumn("Keyboards").setSpanRows(true).setCellAttributes("class='k'"); 652 Set<String> missingScripts = new TreeSet<>(); 653 UnicodeSet notNFKC = new UnicodeSet("[:nfkcqc=n:]"); 654 UnicodeSet COMMONINHERITED = new UnicodeSet("[[:sc=common:][:sc=inherited:]]"); 655 656 for (int script = 0; script < charToKeyboards.length; ++script) { 657 UnicodeSet inScript = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script).removeAll( 658 notNFKC); 659 660 // UnicodeSet fullScript = new UnicodeSet(inScript); 661 // int fullScriptSize = inScript.size(); 662 if (inScript.size() == 0) { 663 continue; 664 } 665 final TreeMap<String, IdSet> charToKeyboard = charToKeyboards[script]; 666 final String scriptName = UScript.getName(script); 667 final String linkedScriptName = CldrUtility.getDoubleLinkedText(UScript.getShortName(script), 668 scriptName); 669 if (charToKeyboard.size() == 0) { 670 missingScripts.add(scriptName); 671 continue; 672 } 673 674 // also check to see that at least one item is not all common 675 check: if (script != UScript.COMMON && script != UScript.INHERITED) { 676 for (String s : charToKeyboard.keySet()) { 677 if (!COMMONINHERITED.containsAll(s)) { 678 break check; 679 } 680 } 681 missingScripts.add(scriptName); 682 continue; 683 } 684 685 String last = ""; 686 for (Entry<String, IdSet> entry : charToKeyboard.entrySet()) { 687 String s = entry.getKey(); 688 IdSet value = entry.getValue(); 689 final String keyboardsString = value.toString(allIds); 690 if (!s.equalsIgnoreCase(last)) { 691 if (s.equals("\u094D\u200C")) { // Hack, because the browsers width is way off 692 s = "\u094D"; 693 } 694 String name = UCharacter.getName(s, " + "); 695 if (name == null) { 696 name = "[no name]"; 697 } 698 String ch = s.equals("\u0F39") ? "\uFFFD" : s; 699 t.addRow() 700 .addCell(linkedScriptName) 701 .addCell((SHOW_BACKGROUND ? "<span class='ybg'>" : "") + 702 TransliteratorUtilities.toHTML.transform(ch) 703 + (SHOW_BACKGROUND ? "</span>" : "")) 704 .addCell(Utility.hex(s, 4, " + ")) 705 .addCell(name) 706 .addCell(keyboardsString) 707 .finishRow(); 708 } 709 inScript.remove(s); 710 last = s; 711 } 712 if (inScript.size() != 0 && script != UScript.UNKNOWN) { 713 // String pattern; 714 // if (inScript.size() < 255 || inScript.size()*4 < fullScriptSize) { 715 // } else { 716 // fullScript.removeAll(inScript); 717 // inScript = new UnicodeSet("[[:sc=" + UScript.getShortName(script) + ":]-" + 718 // fullScript.toPattern(false) + "]"); 719 // } 720 t.addRow() 721 .addCell(linkedScriptName) 722 .addCell("") 723 .addCell(String.valueOf(inScript.size())) 724 .addCell("missing (NFKC)!") 725 .addCell(safeUnicodeSet(inScript)) 726 .finishRow(); 727 } 728 } 729 t.addRow() 730 .addCell("") 731 .addCell("") 732 .addCell(String.valueOf(missingScripts.size())) 733 .addCell("missing scripts!") 734 .addCell(missingScripts.toString()) 735 .finishRow(); 736 pw.println(t.toTable()); 737 } 738 } 739 getInfo(Id keyboardId, UnicodeSet common, CLDRFile cldrFile)740 private static String getInfo(Id keyboardId, UnicodeSet common, CLDRFile cldrFile) { 741 Counter<String> results = new Counter<>(); 742 for (String s : common) { 743 int first = s.codePointAt(0); // first char is good enough 744 results.add(UScript.getShortName(UScript.getScript(first)), 1); 745 } 746 results.remove("Zyyy"); 747 results.remove("Zinh"); 748 results.remove("Zzzz"); 749 750 if (cldrFile != null) { 751 UnicodeSet exemplars = new UnicodeSet(cldrFile.getExemplarSet("", WinningChoice.WINNING)); 752 UnicodeSet auxExemplars = cldrFile.getExemplarSet("auxiliary", WinningChoice.WINNING); 753 if (auxExemplars != null) { 754 exemplars.addAll(auxExemplars); 755 } 756 UnicodeSet punctuationExemplars = cldrFile.getExemplarSet("punctuation", WinningChoice.WINNING); 757 if (punctuationExemplars != null) { 758 exemplars.addAll(punctuationExemplars); 759 } 760 exemplars.addAll(getNumericExemplars(cldrFile)); 761 exemplars.addAll(getQuotationMarks(cldrFile)); 762 exemplars.add(" "); 763 addComparison(keyboardId, common, exemplars, results); 764 } 765 StringBuilder b = new StringBuilder(); 766 for (String entry : results.keySet()) { 767 if (b.length() != 0) { 768 b.append(", "); 769 } 770 b.append(entry).append(":").append(results.get(entry)); 771 } 772 return b.toString(); 773 } 774 addComparison(Id keyboardId, UnicodeSet keyboard, UnicodeSet exemplars, Counter<String> results)775 private static void addComparison(Id keyboardId, UnicodeSet keyboard, UnicodeSet exemplars, 776 Counter<String> results) { 777 UnicodeSet common = new UnicodeSet(keyboard).retainAll(exemplars); 778 if (common.size() != 0) { 779 results.add("k∩cldr", common.size()); 780 } 781 common = new UnicodeSet(keyboard).removeAll(exemplars); 782 if (common.size() != 0) { 783 results.add("k‑cldr", common.size()); 784 if (keyboardId != null) { 785 common.remove(0, 0x7F); // don't care much about ASCII. 786 logInfo.put(Row.of("k-cldr\t" + keyboardId.getBaseLanguage(), common), keyboardId); 787 // Log.logln(keyboardId + "\tk-cldr\t" + common.toPattern(false)); 788 } 789 } 790 common = new UnicodeSet(exemplars).removeAll(keyboard).remove("ss"); 791 if (common.size() != 0) { 792 results.add("cldr‑k", common.size()); 793 if (keyboardId != null && SKIP_LOG.containsNone(common)) { 794 logInfo.put(Row.of("cldr‑k\t" + keyboardId.getBaseLanguage(), common), keyboardId); 795 // Log.logln(keyboardId + "\tcldr‑k\t" + common.toPattern(false)); 796 } 797 } 798 } 799 800 static final UnicodeSet SKIP_LOG = new UnicodeSet("[가一]").freeze(); 801 static Relation<Row.R2<String, UnicodeSet>, Id> logInfo = Relation.of(new TreeMap<Row.R2<String, UnicodeSet>, Set<Id>>(), TreeSet.class); 802 803 static class Id implements Comparable<Id> { 804 final String locale; 805 final String platform; 806 final String variant; 807 final String platformVersion; 808 Id(String input, String platformVersion)809 Id(String input, String platformVersion) { 810 int pos = input.indexOf("-t-k0-"); 811 String localeTemp = input.substring(0, pos); 812 locale = ULocale.minimizeSubtags(ULocale.forLanguageTag(localeTemp)).toLanguageTag(); 813 pos += 6; 814 int pos2 = input.indexOf('-', pos); 815 if (pos2 > 0) { 816 platform = input.substring(pos, pos2); 817 variant = input.substring(pos2 + 1); 818 } else { 819 platform = input.substring(pos); 820 variant = ""; 821 } 822 this.platformVersion = platformVersion; 823 } 824 825 @Override compareTo(Id other)826 public int compareTo(Id other) { 827 int result; 828 if (0 != (result = locale.compareTo(other.locale))) { 829 return result; 830 } 831 if (0 != (result = platform.compareTo(other.platform))) { 832 return result; 833 } 834 if (0 != (result = variant.compareTo(other.variant))) { 835 return result; 836 } 837 return 0; 838 } 839 840 @Override toString()841 public String toString() { 842 return locale + "/" + platform + "/" + variant; 843 } 844 getBaseLanguage()845 public String getBaseLanguage() { 846 int pos = locale.indexOf('-'); 847 return pos < 0 ? locale : locale.substring(0, pos); 848 } 849 } 850 851 static class IdSet { 852 Map<String, Relation<String, String>> data = new TreeMap<>(); 853 add(Id id)854 public void add(Id id) { 855 Relation<String, String> platform2variant = data.get(id.platform); 856 if (platform2variant == null) { 857 data.put(id.platform, platform2variant = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class)); 858 } 859 platform2variant.put(id.locale, id.variant); 860 } 861 addAll(Collection<Id> idSet)862 public void addAll(Collection<Id> idSet) { 863 for (Id id : idSet) { 864 add(id); 865 } 866 } 867 toString(IdSet allIds)868 public String toString(IdSet allIds) { 869 if (this.equals(allIds)) { 870 return "*"; 871 } 872 StringBuilder b = new StringBuilder(); 873 final Set<Entry<String, Relation<String, String>>> entrySet = data.entrySet(); 874 boolean first = true; 875 for (Entry<String, Relation<String, String>> entry : entrySet) { 876 if (first) { 877 first = false; 878 } else { 879 b.append(" "); 880 } 881 String key = entry.getKey(); 882 Set<Entry<String, Set<String>>> valueSet = entry.getValue().keyValuesSet(); 883 b.append(key).append(":"); 884 appendLocaleAndVariants(b, valueSet, allIds.data.get(key)); 885 } 886 return b.toString(); 887 } 888 appendLocaleAndVariants(StringBuilder b, Set<Entry<String, Set<String>>> set, Relation<String, String> relation)889 private void appendLocaleAndVariants(StringBuilder b, Set<Entry<String, Set<String>>> set, 890 Relation<String, String> relation) { 891 if (set.equals(relation.keyValuesSet())) { 892 b.append("*"); 893 return; 894 } 895 final int setSize = set.size(); 896 if (setSize > 9) { 897 b.append(setSize).append("/").append(relation.size()); 898 return; 899 } 900 final boolean isSingle = setSize == 1; 901 if (!isSingle) b.append("("); 902 boolean first = true; 903 for (Entry<String, Set<String>> item : set) { 904 if (first) { 905 first = false; 906 } else { 907 b.append("|"); 908 } 909 final String key = item.getKey(); 910 b.append(key); 911 final Set<String> variants = item.getValue(); 912 final int size = variants.size(); 913 if (size != 0) { 914 if (size == 1) { 915 String firstOne = variants.iterator().next(); 916 if (firstOne.isEmpty()) { 917 continue; // fr-CA/∅ => fr-CA 918 } 919 } 920 b.append("/"); 921 appendVariant(b, variants, relation.get(key)); 922 } 923 } 924 if (!isSingle) b.append(")"); 925 } 926 appendVariant(StringBuilder b, Set<String> set, Set<String> set2)927 private void appendVariant(StringBuilder b, Set<String> set, Set<String> set2) { 928 if (set.equals(set2)) { 929 b.append("*"); 930 return; 931 } 932 final boolean isSingle = set.size() == 1; 933 if (!isSingle) b.append("("); 934 boolean first = true; 935 for (String item : set) { 936 if (first) { 937 first = false; 938 } else { 939 b.append("|"); 940 } 941 b.append(item.isEmpty() ? "∅" : item); 942 } 943 if (!isSingle) b.append(")"); 944 } 945 isEquals(Object other)946 public boolean isEquals(Object other) { 947 return data.equals(((IdSet) other).data); 948 } 949 950 @Override hashCode()951 public int hashCode() { 952 return data.hashCode(); 953 } 954 } 955 956 // public static class Key { 957 // Iso iso; 958 // ModifierSet modifierSet; 959 // } 960 // /** 961 // * Return all possible results. Could be external utility. WARNING: doesn't account for transform='no' or 962 // failure='omit'. 963 // */ 964 // public Map<String,List<Key>> getPossibleSource() { 965 // Map<String,List<Key>> results = new HashMap<String,List<Key>>(); 966 // UnicodeSet results = new UnicodeSet(); 967 // addOutput(getBaseMap().iso2output.values(), results); 968 // for (KeyMap keymap : getKeyMaps()) { 969 // addOutput(keymap.string2output.values(), results); 970 // } 971 // for (Transforms transforms : getTransforms().values()) { 972 // // loop, to catch empty case 973 // for (String result : transforms.string2string.values()) { 974 // if (!result.isEmpty()) { 975 // results.add(result); 976 // } 977 // } 978 // } 979 // return results; 980 // } 981 getQuotationMarks(CLDRFile file)982 static UnicodeSet getQuotationMarks(CLDRFile file) { 983 UnicodeSet results = new UnicodeSet(); 984 // TODO should have a test to make sure these are in exemplars. 985 results.add(file.getStringValue("//ldml/delimiters/quotationEnd")); 986 results.add(file.getStringValue("//ldml/delimiters/quotationStart")); 987 results.add(file.getStringValue("//ldml/delimiters/alternateQuotationEnd")); 988 results.add(file.getStringValue("//ldml/delimiters/alternateQuotationStart")); 989 return results; 990 } 991 992 // TODO Add as utility to CLDRFile getNumericExemplars(CLDRFile file)993 static UnicodeSet getNumericExemplars(CLDRFile file) { 994 UnicodeSet results = new UnicodeSet(); 995 String defaultNumberingSystem = file.getStringValue("//ldml/numbers/defaultNumberingSystem"); 996 String nativeNumberingSystem = file.getStringValue("//ldml/numbers/otherNumberingSystems/native"); 997 // "//ldml/numbers/otherNumberingSystems/native" 998 addNumberingSystem(file, results, "latn"); 999 if (!defaultNumberingSystem.equals("latn")) { 1000 addNumberingSystem(file, results, defaultNumberingSystem); 1001 } 1002 if (!nativeNumberingSystem.equals("latn") && !nativeNumberingSystem.equals(defaultNumberingSystem)) { 1003 addNumberingSystem(file, results, nativeNumberingSystem); 1004 } 1005 return results; 1006 } 1007 addNumberingSystem(CLDRFile file, UnicodeSet results, String numberingSystem)1008 public static void addNumberingSystem(CLDRFile file, UnicodeSet results, String numberingSystem) { 1009 String digits = supplementalDataInfo.getDigits(numberingSystem); 1010 results.addAll(digits); 1011 addSymbol(file, numberingSystem, "decimal", results); 1012 addSymbol(file, numberingSystem, "group", results); 1013 addSymbol(file, numberingSystem, "minusSign", results); 1014 addSymbol(file, numberingSystem, "percentSign", results); 1015 addSymbol(file, numberingSystem, "plusSign", results); 1016 } 1017 addSymbol(CLDRFile file, String numberingSystem, String key, UnicodeSet results)1018 public static void addSymbol(CLDRFile file, String numberingSystem, String key, UnicodeSet results) { 1019 String symbol = file.getStringValue("//ldml/numbers/symbols[@numberSystem=\"" + numberingSystem + "\"]/" + 1020 key); 1021 results.add(symbol); 1022 } 1023 } 1024