1 package org.unicode.cldr.tool; 2 3 import java.io.IOException; 4 import java.io.PrintWriter; 5 import java.util.BitSet; 6 import java.util.Collection; 7 import java.util.HashMap; 8 import java.util.HashSet; 9 import java.util.LinkedHashSet; 10 import java.util.List; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.TreeMap; 15 import java.util.TreeSet; 16 import java.util.regex.Matcher; 17 18 import org.unicode.cldr.draft.FileUtilities; 19 import org.unicode.cldr.draft.Keyboard; 20 import org.unicode.cldr.draft.Keyboard.Gesture; 21 import org.unicode.cldr.draft.Keyboard.Iso; 22 import org.unicode.cldr.draft.Keyboard.KeyMap; 23 import org.unicode.cldr.draft.Keyboard.Output; 24 import org.unicode.cldr.draft.Keyboard.TransformStatus; 25 import org.unicode.cldr.draft.Keyboard.TransformType; 26 import org.unicode.cldr.draft.Keyboard.Transforms; 27 import org.unicode.cldr.draft.KeyboardModifierSet; 28 import org.unicode.cldr.tool.Option.Options; 29 import org.unicode.cldr.util.CLDRConfig; 30 import org.unicode.cldr.util.CLDRFile; 31 import org.unicode.cldr.util.CLDRFile.WinningChoice; 32 import org.unicode.cldr.util.CLDRPaths; 33 import org.unicode.cldr.util.CLDRTool; 34 import org.unicode.cldr.util.CldrUtility; 35 import org.unicode.cldr.util.Counter; 36 import org.unicode.cldr.util.Factory; 37 import org.unicode.cldr.util.FileCopier; 38 import org.unicode.cldr.util.LanguageTagCanonicalizer; 39 import org.unicode.cldr.util.Log; 40 import org.unicode.cldr.util.PatternCache; 41 import org.unicode.cldr.util.SupplementalDataInfo; 42 import org.unicode.cldr.util.TransliteratorUtilities; 43 import org.unicode.cldr.util.UnicodeSetPrettyPrinter; 44 45 import com.ibm.icu.dev.util.CollectionUtilities; 46 import com.ibm.icu.impl.Relation; 47 import com.ibm.icu.impl.Row; 48 import com.ibm.icu.impl.Row.R2; 49 import com.ibm.icu.impl.Row.R3; 50 import com.ibm.icu.impl.Utility; 51 import com.ibm.icu.lang.UCharacter; 52 import com.ibm.icu.lang.UProperty; 53 import com.ibm.icu.lang.UScript; 54 import com.ibm.icu.text.Collator; 55 import com.ibm.icu.text.Transliterator; 56 import com.ibm.icu.text.UnicodeSet; 57 import com.ibm.icu.util.ICUUncheckedIOException; 58 import com.ibm.icu.util.ULocale; 59 60 @CLDRTool(alias = "showkeyboards", description = "Generate keyboard charts") 61 public class ShowKeyboards { 62 // TODO - fix ' > xxx 63 // TODO - check for bad locale ids 64 65 private static final String ABOUT_KEYBOARD_CHARTS = "<p>For more information, see <a target='ABOUT_KB' href='http://cldr.unicode.org/index/charts/keyboards'>About Keyboard Charts</a>.</p>"; 66 private static String keyboardChartDir; 67 private static String keyboardChartLayoutsDir; 68 static final CLDRConfig testInfo = ToolConfig.getToolInstance(); 69 static final Factory factory = testInfo.getCldrFactory(); 70 71 static final boolean SHOW_BACKGROUND = false; 72 73 final static Options myOptions = new Options(); 74 75 enum MyOptions { 76 idFilter(".+", ".*", "Filter the information based on id, using a regex argument."), sourceDirectory(".+", CLDRPaths.BASE_DIRECTORY + "keyboards/", 77 "The source directory. CURRENTLY CAN’T BE CHANGED!!"), targetDirectory(".+", CLDRPaths.CHART_DIRECTORY + "keyboards/", 78 "The target directory."), layouts(null, null, 79 "Only create html files for keyboard layouts"), repertoire(null, null, "Only create html files for repertoire"),; 80 // boilerplate 81 final Option option; 82 MyOptions(String argumentPattern, String defaultArgument, String helpText)83 MyOptions(String argumentPattern, String defaultArgument, String helpText) { 84 option = myOptions.add(this, argumentPattern, defaultArgument, helpText); 85 } 86 } 87 88 static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance(); 89 90 // ********************************************* 91 // Temporary, for some simple testing 92 // ********************************************* main(String[] args)93 public static void main(String[] args) throws IOException { 94 myOptions.parse(MyOptions.idFilter, args, true); 95 String idPattern = MyOptions.idFilter.option.getValue(); 96 keyboardChartDir = MyOptions.targetDirectory.option.getValue(); 97 keyboardChartLayoutsDir = keyboardChartDir + "/layouts/"; 98 99 FileCopier.ensureDirectoryExists(keyboardChartDir); 100 FileCopier.copy(ShowKeyboards.class, "keyboards-index.html", keyboardChartDir, "index.html"); 101 102 Matcher idMatcher = PatternCache.get(idPattern).matcher(""); 103 try { 104 Log.setLog(CLDRPaths.LOG_DIRECTORY + "keyboard-log.txt"); 105 } catch (IOException e) { 106 throw new ICUUncheckedIOException(e); 107 } 108 boolean layoutsOnly = MyOptions.layouts.option.doesOccur(); 109 boolean repertoireOnly = MyOptions.repertoire.option.doesOccur(); 110 111 if (!repertoireOnly) { 112 showHtml(idMatcher); 113 } 114 if (!layoutsOnly) { 115 showRepertoire(idMatcher); 116 } 117 } 118 showRepertoire(Matcher idMatcher)119 public static void showRepertoire(Matcher idMatcher) { 120 Set<Exception> totalErrors = new LinkedHashSet<Exception>(); 121 Set<Exception> errors = new LinkedHashSet<Exception>(); 122 UnicodeSet controls = new UnicodeSet("[:Cc:]").freeze(); 123 // check what the characters are, excluding controls. 124 Map<Id, UnicodeSet> id2unicodeset = new TreeMap<Id, UnicodeSet>(); 125 Set<String> totalModifiers = new LinkedHashSet<String>(); 126 Relation<String, Id> locale2ids = Relation.of(new TreeMap<String, Set<Id>>(), TreeSet.class); 127 LanguageTagCanonicalizer canonicalizer = new LanguageTagCanonicalizer(); 128 IdInfo idInfo = new IdInfo(); 129 for (String platformId : Keyboard.getPlatformIDs()) { 130 //Platform p = Keyboard.getPlatform(platformId); 131 // System.out.println(platformId + "\t" + p.getHardwareMap()); 132 for (String keyboardId : Keyboard.getKeyboardIDs(platformId)) { 133 if (!idMatcher.reset(keyboardId).matches()) { 134 continue; 135 } 136 Keyboard keyboard = Keyboard.getKeyboard(platformId, keyboardId, errors); 137 for (Exception error : errors) { 138 totalErrors.add(new IllegalArgumentException(keyboardId, error)); 139 } 140 UnicodeSet unicodeSet = keyboard.getPossibleResults().removeAll(controls); 141 final Id id = new Id(keyboardId, keyboard.getPlatformVersion()); 142 idInfo.add(id, unicodeSet); 143 String canonicalLocale = canonicalizer.transform(id.locale).replace('_', '-'); 144 if (!id.locale.equals(canonicalLocale)) { 145 totalErrors.add(new IllegalArgumentException("Non-canonical id: " + id.locale + "\t=>\t" + canonicalLocale)); 146 } 147 id2unicodeset.put(id, unicodeSet.freeze()); 148 locale2ids.put(id.locale, id); 149 System.out.println(id.toString().replace('/', '\t') + "\t" + keyboard.getNames()); 150 for (KeyMap keymap : keyboard.getKeyMaps()) { 151 totalModifiers.add(keymap.getModifiers().toString()); 152 } 153 } 154 } 155 if (totalErrors.size() != 0) { 156 System.out.println("Errors\t" + CollectionUtilities.join(totalErrors, System.lineSeparator() + "\t")); 157 } 158 for (String item : totalModifiers) { 159 System.out.println(item); 160 } 161 // logInfo.put(Row.of("k-cldr",common), keyboardId); 162 try { 163 FileCopier.copy(ShowKeyboards.class, "keyboards.css", keyboardChartDir, "index.css"); 164 PrintWriter out = FileUtilities.openUTF8Writer(keyboardChartDir, "chars2keyboards.html"); 165 String[] headerAndFooter = new String[2]; 166 167 ShowData.getChartTemplate( 168 "Characters → Keyboards", 169 ToolConstants.CHART_DISPLAY_VERSION, 170 "", 171 headerAndFooter, null, false); 172 out.println(headerAndFooter[0] + ABOUT_KEYBOARD_CHARTS); 173 174 // printTop("Characters → Keyboards", out); 175 idInfo.print(out); 176 // printBottom(out); 177 out.println(headerAndFooter[1]); 178 out.close(); 179 180 out = FileUtilities.openUTF8Writer(keyboardChartDir, "keyboards2chars.html"); 181 ShowData.getChartTemplate( 182 "Keyboards → Characters", 183 ToolConstants.CHART_DISPLAY_VERSION, 184 "", 185 headerAndFooter, null, false); 186 out.println(headerAndFooter[0] 187 + ABOUT_KEYBOARD_CHARTS); 188 // printTop("Keyboards → Characters", out); 189 showLocaleToCharacters(out, id2unicodeset, locale2ids); 190 // printBottom(out); 191 out.println(headerAndFooter[1]); 192 out.close(); 193 } catch (IOException e1) { 194 e1.printStackTrace(); 195 } 196 for (Entry<R2<String, UnicodeSet>, Set<Id>> entry : logInfo.keyValuesSet()) { 197 IdSet idSet = new IdSet(); 198 idSet.addAll(entry.getValue()); 199 Log.logln(entry.getKey().get0() + "\t" + entry.getKey().get1().toPattern(false) + "\t" 200 + idSet.toString(idInfo.allIds)); 201 } 202 Log.close(); 203 } 204 showHtml(Matcher idMatcher)205 private static void showHtml(Matcher idMatcher) throws IOException { 206 Set<Exception> errors = new LinkedHashSet<Exception>(); 207 Relation<String, Row.R3<String, String, String>> locale2keyboards = Relation.of( 208 new TreeMap<String, Set<Row.R3<String, String, String>>>(), TreeSet.class); 209 Map<String, String> localeIndex = new TreeMap<String, String>(); 210 211 for (String platformId : Keyboard.getPlatformIDs()) { 212 //Platform p = Keyboard.getPlatform(platformId); 213 // System.out.println(platformId + "\t" + p.getHardwareMap()); 214 for (String keyboardId : Keyboard.getKeyboardIDs(platformId)) { 215 if (!idMatcher.reset(keyboardId).matches()) { 216 continue; 217 } 218 String baseLocale = keyboardId.substring(0, keyboardId.indexOf('-')); 219 String locale = keyboardId.substring(0, keyboardId.indexOf("-t-")); 220 locale2keyboards.put(baseLocale, Row.of(platformId, locale, keyboardId)); 221 222 final String localeName = testInfo.getEnglish().getName(baseLocale, true); 223 localeIndex.put(localeName, baseLocale); 224 } 225 } 226 227 FileCopier.ensureDirectoryExists(keyboardChartLayoutsDir); 228 FileCopier.copy(ShowKeyboards.class, "keyboards.css", keyboardChartLayoutsDir, "index.css"); 229 PrintWriter index = FileUtilities.openUTF8Writer(keyboardChartLayoutsDir, "index.html"); 230 String[] headerAndFooter = new String[2]; 231 ShowData.getChartTemplate( 232 "Keyboard Layout Index", 233 ToolConstants.CHART_DISPLAY_VERSION, 234 "", 235 headerAndFooter, "Keyboard Index", false); 236 index 237 .println(headerAndFooter[0] + ABOUT_KEYBOARD_CHARTS); 238 // printTop("Keyboard Layout Index", index); 239 index.println("<ol>"); 240 for (Entry<String, String> entry : localeIndex.entrySet()) { 241 index.println("<li><a href='" + entry.getValue() + ".html'>" 242 + entry.getKey() + "</a>" 243 + " [" + entry.getValue() + "]" + 244 "</li>"); 245 } 246 index.println("</ol>"); 247 index.println(headerAndFooter[1]); 248 // printBottom(index); 249 index.close(); 250 // FileUtilities.copyFile(ShowKeyboards.class, "keyboards.css", keyboardChartLayoutsDir); 251 252 for (Entry<String, Set<R3<String, String, String>>> localeKeyboards : locale2keyboards.keyValuesSet()) { 253 String locale = localeKeyboards.getKey(); 254 final String localeName = testInfo.getEnglish().getName(locale); 255 256 // String localeNameString = localeName.replace(' ', '_').toLowerCase(Locale.ENGLISH); 257 PrintWriter out = FileUtilities.openUTF8Writer(keyboardChartLayoutsDir, locale + ".html"); 258 ShowData.getChartTemplate( 259 "Layouts: " + localeName + " (" + locale + ")", 260 ToolConstants.CHART_DISPLAY_VERSION, 261 "", 262 headerAndFooter, null, false); 263 out.println(headerAndFooter[0] + ABOUT_KEYBOARD_CHARTS); 264 // printTop("Layouts: " + localeName + " (" + locale + ")", out); 265 Set<R3<String, String, String>> keyboards = localeKeyboards.getValue(); 266 for (R3<String, String, String> platformKeyboard : keyboards) { 267 String platformId = platformKeyboard.get0(); 268 String keyboardId = platformKeyboard.get2(); 269 // System.out.println(platformId + "\t" + p.getHardwareMap()); 270 Keyboard keyboard = Keyboard.getKeyboard(platformId, keyboardId, errors); 271 showErrors(errors); 272 Set<String> names = keyboard.getNames(); 273 String platformFromKeyboardId = Keyboard.getPlatformId(keyboardId); 274 String printId = platformId.equals(platformFromKeyboardId) ? keyboardId : keyboardId + "/und"; 275 out.println("<h2>" + CldrUtility.getDoubleLinkedText(printId, printId) 276 + (names.size() == 0 ? "" : " " + names) 277 + "</h2>"); 278 279 Transforms transforms = keyboard.getTransforms().get(TransformType.SIMPLE); 280 281 out.println("<table class='keyboards'><tr>"); 282 for (KeyMap map : keyboard.getKeyMaps()) { 283 KeyboardModifierSet mods = map.getModifiers(); 284 out.println("<td class='keyboardTD'><table class='keyboard'>"); 285 // KeyboardModifierSet modifiers = map.getModifiers(); 286 Map<Iso, Output> isoMap = map.getIso2Output(); 287 for (Keyboard.IsoRow row : Keyboard.IsoRow.values()) { 288 out.println("<tr>"); 289 for (Iso isoValue : Iso.values()) { 290 if (isoValue.isoRow != row) { 291 continue; 292 } 293 Output output = isoMap.get(isoValue); 294 if (output == null) { 295 out.println("<td class='x'> </td>"); 296 continue; 297 } 298 String chars = output.getOutput(); 299 TransformStatus transformStatus = output.getTransformStatus(); 300 StringBuilder hover = new StringBuilder(); 301 if (transformStatus == TransformStatus.DEFAULT && transforms != null) { 302 Map<String, String> map2 = transforms.getMatch(chars); 303 add(map2, hover); 304 } 305 Map<Gesture, List<String>> gestures = output.getGestures(); 306 if (!gestures.isEmpty()) { 307 add(gestures, hover); 308 } 309 final String longPress = hover.length() == 0 ? "" 310 : " title='" + hover + "'"; 311 out.println("<td class='" + (hover.length() == 0 ? 'm' : 'h') + 312 "'" + longPress + ">" 313 + toSafeHtml(chars) + "</td>"); 314 } 315 out.println("</tr>"); 316 } 317 String modsString = mods.getShortInput(); 318 if (modsString.isEmpty()) { 319 modsString = "\u00A0"; 320 } else if (modsString.length() > 20) { 321 modsString = modsString.substring(0, 20) + "…"; 322 } 323 out.println("</table><span class='modifiers'>" 324 + TransliteratorUtilities.toHTML.transform(modsString) + 325 "</span></td>"); 326 } 327 out.println("</tr></table>"); 328 } 329 index.println(headerAndFooter[1]); 330 // printBottom(out); 331 out.close(); 332 } 333 System.out.println("Failing Invisibles: " + FAILING_INVISIBLE.retainAll(INVISIBLE)); 334 } 335 showErrors(Set<Exception> errors)336 private static void showErrors(Set<Exception> errors) { 337 for (Exception error : errors) { 338 String title = error.getMessage().contains("No minimal data for") ? "Warning" : "Error"; 339 System.out.println("\t*" + title + ":\t" + error); 340 } 341 } 342 343 static Transliterator TO_SAFE_HTML; 344 static { 345 StringBuilder rules = new StringBuilder(TransliteratorUtilities.toHTML.toRules(false)); 346 for (char i = 0; i < 0x20; ++i) { String.valueOf(i)347 addRule(String.valueOf(i), "^" + String.valueOf((char) (i + 0x40)), rules); 348 } 349 String[][] map = { 350 // {"\u0020","sp"}, 351 { "\u007F", "del" }, 352 { "\u00A0", "nbsp" }, 353 { "\u00AD", "shy" }, 354 { "\u200B", "zwsp" }, 355 { "\u200C", "zwnj" }, 356 { "\u200D", "zwj" }, 357 { "\u200E", "lrm" }, 358 { "\u200F", "rlm" }, 359 { "\u202F", "nnbs" }, 360 { "\uFEFF", "bom" }, 361 { "\u180B", "mvs1" }, 362 { "\u180C", "mvs2" }, 363 { "\u180D", "mvs3" }, 364 { "\u180E", "mvs" }, 365 // {"\uF8FF","appl"}, 366 }; 367 for (String[] items : map) { 368 final String fromItem = items[0]; 369 final String toItem = items[1]; addRule(fromItem, toItem, rules)370 addRule(fromItem, toItem, rules); 371 } 372 TO_SAFE_HTML = Transliterator.createFromRules("none", rules.toString(), Transliterator.FORWARD); 373 } 374 addRule(final String fromItem, final String toItem, StringBuilder rules)375 public static void addRule(final String fromItem, final String toItem, StringBuilder rules) { 376 rules.append("'" 377 + fromItem 378 + "'>" 379 + "'<span class=\"cc\">" 380 + toItem 381 + "</span>'" 382 + ";" 383 + System.lineSeparator()); 384 } 385 386 static UnicodeSet INVISIBLE = new UnicodeSet("[[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]-[\\u0020]]").freeze(); 387 static UnicodeSet FAILING_INVISIBLE = new UnicodeSet(); 388 toSafeHtml(Object hover)389 public static String toSafeHtml(Object hover) { 390 String result = TO_SAFE_HTML.transform(hover.toString()); 391 if (INVISIBLE.containsSome(result)) { 392 FAILING_INVISIBLE.addAll(result); 393 } 394 return result; 395 } 396 add(Map<K, V> map2, StringBuilder hover)397 private static <K, V> void add(Map<K, V> map2, StringBuilder hover) { 398 if (!map2.isEmpty()) { 399 for (Entry<K, V> entry : map2.entrySet()) { 400 if (hover.length() != 0) { 401 hover.append("; "); 402 } 403 final K key = entry.getKey(); 404 String keyString = key == Gesture.LONGPRESS ? "LP" : key.toString(); 405 final V value = entry.getValue(); 406 String valueString = value instanceof Collection 407 ? CollectionUtilities.join((Collection) value, " ") 408 : value.toString(); 409 hover.append(TransliteratorUtilities.toHTML.transform(keyString)).append("→") 410 .append(TransliteratorUtilities.toHTML.transform(valueString)); 411 } 412 } 413 } 414 415 // public static void printTop(String title, PrintWriter out) { 416 // out.println( 417 // "<html>\n" + 418 // "<head>\n" + 419 // "<meta http-equiv='Content-Type' content='text/html; charset=UTF-8'/>\n" + 420 // "<link rel='stylesheet' type='text/css' href='keyboards.css'>" + 421 // "<title>" + title + "</title>\n" + 422 // "</head>\n" + 423 // "<body>\n" + 424 // "<h1>DRAFT " + 425 // title + 426 // "</h1>\n" + 427 // "<p>For more information, see <a href='http://cldr.unicode.org/index/charts/keyboards'>Keyboard Charts</a>.</p>" 428 // ); 429 // } 430 // 431 // public static void printBottom(PrintWriter pw) { 432 // pw.println( 433 // "</body>\n" + 434 // "</html>" 435 // ); 436 // } 437 showLocaleToCharacters(PrintWriter out, Map<Id, UnicodeSet> id2unicodeset, Relation<String, Id> locale2ids)438 public static void showLocaleToCharacters(PrintWriter out, Map<Id, UnicodeSet> id2unicodeset, 439 Relation<String, Id> locale2ids) { 440 441 TablePrinter t = new TablePrinter() 442 .addColumn("Name").setSpanRows(true).setBreakSpans(true).setSortPriority(0) 443 .setCellAttributes("class='cell'") 444 .addColumn("Locale").setSpanRows(true).setBreakSpans(true).setCellAttributes("class='cell'") 445 .addColumn("Platform").setSpanRows(true).setCellAttributes("class='cell'") 446 .addColumn("Variant").setCellAttributes("class='cell'") 447 .addColumn("Script").setCellAttributes("class='cell'") 448 .addColumn("Statistics").setCellAttributes("class='cell'") 449 .addColumn("Characters").setSpanRows(true).setCellAttributes("class='cell'"); 450 451 Map<String, UnicodeSet> commonSets = new HashMap<String, UnicodeSet>(); 452 Counter<String> commonCount = new Counter<String>(); 453 Set<String> commonDone = new HashSet<String>(); 454 455 for (Entry<String, Set<Id>> localeAndIds : locale2ids.keyValuesSet()) { 456 final String key = localeAndIds.getKey(); 457 final Set<Id> keyboardIds = localeAndIds.getValue(); 458 459 // System.out.println(); 460 final String localeName = testInfo.getEnglish().getName(key, true); 461 final String linkedLocaleName = CldrUtility.getDoubleLinkedText(key, localeName); 462 final ULocale uLocale = ULocale.forLanguageTag(key); 463 String script = uLocale.getScript(); 464 String writtenLanguage = uLocale.getLanguage() + (script.isEmpty() ? "" : "_" + script); 465 CLDRFile cldrFile = null; 466 try { 467 cldrFile = factory.make(writtenLanguage, true); 468 } catch (Exception e) { 469 } 470 471 // final String heading = uLocale.getDisplayName(ULocale.ENGLISH) 472 // + "\t" + ULocale.addLikelySubtags(uLocale).getScript() 473 // + "\t"; 474 UnicodeSet common = UnicodeSet.EMPTY; 475 final String likelyScript = ULocale.addLikelySubtags(uLocale).getScript(); 476 commonCount.clear(); 477 for (String platform : Keyboard.getPlatformIDs()) { 478 commonSets.put(platform, UnicodeSet.EMPTY); 479 } 480 if (keyboardIds.size() > 1) { 481 common = UnicodeSet.EMPTY; 482 for (Id keyboardId : keyboardIds) { 483 final UnicodeSet keyboardSet = id2unicodeset.get(keyboardId); 484 if (common == UnicodeSet.EMPTY) { 485 common = new UnicodeSet(keyboardSet); 486 } else { 487 common.retainAll(keyboardSet); 488 } 489 UnicodeSet platformCommon = commonSets.get(keyboardId.platform); 490 commonCount.add(keyboardId.platform, 1); 491 if (platformCommon == UnicodeSet.EMPTY) { 492 commonSets.put(keyboardId.platform, new UnicodeSet(keyboardSet)); 493 } else { 494 platformCommon.retainAll(keyboardSet); 495 } 496 } 497 common.freeze(); 498 t.addRow() 499 .addCell(linkedLocaleName) // name 500 .addCell(key) // locale 501 .addCell("ALL") // platform 502 .addCell("COMMON") // variant 503 .addCell(likelyScript) // script 504 .addCell(getInfo(null, common, cldrFile)) // stats 505 .addCell(safeUnicodeSet(common)) // characters 506 .finishRow(); 507 508 // System.out.println( 509 // locale + "\tCOMMON\t\t-" 510 // + "\t" + heading + getInfo(common, cldrFile) 511 // + "\t" + common.toPattern(false)); 512 } 513 commonDone.clear(); 514 for (Id keyboardId : keyboardIds) { 515 UnicodeSet platformCommon = commonSets.get(keyboardId.platform); 516 if (!commonDone.contains(keyboardId.platform)) { 517 commonDone.add(keyboardId.platform); 518 if (commonCount.get(keyboardId.platform) <= 1) { 519 platformCommon = UnicodeSet.EMPTY; 520 commonSets.put(keyboardId.platform, platformCommon); 521 } else if (platformCommon.size() > 0) { 522 // get stats for all, but otherwise remove common. 523 final String stats = getInfo(null, platformCommon, cldrFile); 524 platformCommon.removeAll(common).freeze(); 525 commonSets.put(keyboardId.platform, platformCommon); 526 t.addRow() 527 .addCell(linkedLocaleName) // name 528 .addCell(key) // locale 529 .addCell(keyboardId.platform) // platform 530 .addCell("COMMON") // variant 531 .addCell(likelyScript) // script 532 .addCell(stats) // stats 533 .addCell(safeUnicodeSet(platformCommon)) // characters 534 .finishRow(); 535 } 536 } 537 final UnicodeSet current2 = id2unicodeset.get(keyboardId); 538 final UnicodeSet remainder = new UnicodeSet(current2) 539 .removeAll(common) 540 .removeAll(platformCommon); 541 542 t.addRow() 543 .addCell(linkedLocaleName) // name 544 .addCell(key) // locale 545 .addCell(keyboardId.platform) // platform 546 .addCell(keyboardId.variant) // variant 547 .addCell(likelyScript) // script 548 .addCell(getInfo(keyboardId, current2, cldrFile)) // stats 549 .addCell(safeUnicodeSet(remainder)) // characters 550 .finishRow(); 551 // System.out.println( 552 // keyboardId.toString().replace('/','\t') 553 // + "\t" + keyboardId.platformVersion 554 // + "\t" + heading + getInfo(current2, cldrFile) 555 // + "\t" + remainder.toPattern(false)); 556 } 557 } 558 out.println(t.toTable()); 559 } 560 561 static UnicodeSetPrettyPrinter prettyPrinter = new UnicodeSetPrettyPrinter() 562 .setOrdering(Collator.getInstance(ULocale.ROOT)) 563 .setSpaceComparator(Collator.getInstance(ULocale.ROOT).setStrength2(Collator.PRIMARY)); 564 safeUnicodeSet(UnicodeSet unicodeSet)565 public static String safeUnicodeSet(UnicodeSet unicodeSet) { 566 return TransliteratorUtilities.toHTML.transform(prettyPrinter.format(unicodeSet)); 567 } 568 569 static class IdInfo { 570 final Collator collator = Collator.getInstance(ULocale.ENGLISH); 571 BitSet bitset = new BitSet(); 572 BitSet bitset2 = new BitSet(); 573 @SuppressWarnings("unchecked") 574 TreeMap<String, IdSet>[] charToKeyboards = new TreeMap[UScript.CODE_LIMIT]; 575 { 576 collator.setStrength(Collator.IDENTICAL); 577 for (int i = 0; i < charToKeyboards.length; ++i) { 578 charToKeyboards[i] = new TreeMap<String, IdSet>(collator); 579 } 580 } 581 IdSet allIds = new IdSet(); 582 add(Id id, UnicodeSet unicodeSet)583 public void add(Id id, UnicodeSet unicodeSet) { 584 allIds.add(id); 585 for (String s : unicodeSet) { 586 int script = getScriptExtensions(s, bitset); 587 if (script >= 0) { 588 addToScript(script, id, s); 589 } else { 590 for (int script2 = bitset.nextSetBit(0); script2 >= 0; script2 = bitset.nextSetBit(script2 + 1)) { 591 addToScript(script2, id, s); 592 } 593 } 594 } 595 } 596 getScriptExtensions(String s, BitSet outputBitset)597 public int getScriptExtensions(String s, BitSet outputBitset) { 598 final int firstCodePoint = s.codePointAt(0); 599 int result = UScript.getScriptExtensions(firstCodePoint, outputBitset); 600 final int firstCodePointCount = Character.charCount(firstCodePoint); 601 if (s.length() == firstCodePointCount) { 602 return result; 603 } 604 for (int i = firstCodePointCount; i < s.length();) { 605 int ch = s.codePointAt(i); 606 UScript.getScriptExtensions(ch, bitset2); 607 outputBitset.or(bitset2); 608 i += Character.charCount(ch); 609 } 610 // remove inherited, if there is anything else; then remove common if there is anything else 611 int cardinality = outputBitset.cardinality(); 612 if (cardinality > 1) { 613 if (outputBitset.get(UScript.INHERITED)) { 614 outputBitset.clear(UScript.INHERITED); 615 --cardinality; 616 } 617 if (cardinality > 1) { 618 if (outputBitset.get(UScript.COMMON)) { 619 outputBitset.clear(UScript.COMMON); 620 --cardinality; 621 } 622 } 623 } 624 if (cardinality == 1) { 625 return outputBitset.nextSetBit(0); 626 } else { 627 return -cardinality; 628 } 629 } 630 addToScript(int script, Id id, String s)631 public void addToScript(int script, Id id, String s) { 632 TreeMap<String, IdSet> charToKeyboard = charToKeyboards[script]; 633 IdSet idSet = charToKeyboard.get(s); 634 if (idSet == null) { 635 charToKeyboard.put(s, idSet = new IdSet()); 636 } 637 idSet.add(id); 638 } 639 print(PrintWriter pw)640 public void print(PrintWriter pw) { 641 642 TablePrinter t = new TablePrinter() 643 .addColumn("Script").setSpanRows(true).setCellAttributes("class='s'") 644 .addColumn("Char").setCellAttributes("class='ch'") 645 .addColumn("Code").setCellAttributes("class='c'") 646 .addColumn("Name").setCellAttributes("class='n'") 647 .addColumn("Keyboards").setSpanRows(true).setCellAttributes("class='k'"); 648 Set<String> missingScripts = new TreeSet<String>(); 649 UnicodeSet notNFKC = new UnicodeSet("[:nfkcqc=n:]"); 650 UnicodeSet COMMONINHERITED = new UnicodeSet("[[:sc=common:][:sc=inherited:]]"); 651 652 for (int script = 0; script < charToKeyboards.length; ++script) { 653 UnicodeSet inScript = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script).removeAll( 654 notNFKC); 655 656 // UnicodeSet fullScript = new UnicodeSet(inScript); 657 // int fullScriptSize = inScript.size(); 658 if (inScript.size() == 0) { 659 continue; 660 } 661 final TreeMap<String, IdSet> charToKeyboard = charToKeyboards[script]; 662 final String scriptName = UScript.getName(script); 663 final String linkedScriptName = CldrUtility.getDoubleLinkedText(UScript.getShortName(script), 664 scriptName); 665 if (charToKeyboard.size() == 0) { 666 missingScripts.add(scriptName); 667 continue; 668 } 669 670 // also check to see that at least one item is not all common 671 check: if (script != UScript.COMMON && script != UScript.INHERITED) { 672 for (String s : charToKeyboard.keySet()) { 673 if (!COMMONINHERITED.containsAll(s)) { 674 break check; 675 } 676 } 677 missingScripts.add(scriptName); 678 continue; 679 } 680 681 String last = ""; 682 for (Entry<String, IdSet> entry : charToKeyboard.entrySet()) { 683 String s = entry.getKey(); 684 IdSet value = entry.getValue(); 685 final String keyboardsString = value.toString(allIds); 686 if (!s.equalsIgnoreCase(last)) { 687 if (s.equals("\u094D\u200C")) { // Hack, because the browsers width is way off 688 s = "\u094D"; 689 } 690 String name = UCharacter.getName(s, " + "); 691 if (name == null) { 692 name = "[no name]"; 693 } 694 String ch = s.equals("\u0F39") ? "\uFFFD" : s; 695 t.addRow() 696 .addCell(linkedScriptName) 697 .addCell((SHOW_BACKGROUND ? "<span class='ybg'>" : "") + 698 TransliteratorUtilities.toHTML.transform(ch) 699 + (SHOW_BACKGROUND ? "</span>" : "")) 700 .addCell(Utility.hex(s, 4, " + ")) 701 .addCell(name) 702 .addCell(keyboardsString) 703 .finishRow(); 704 } 705 inScript.remove(s); 706 last = s; 707 } 708 if (inScript.size() != 0 && script != UScript.UNKNOWN) { 709 // String pattern; 710 // if (inScript.size() < 255 || inScript.size()*4 < fullScriptSize) { 711 // } else { 712 // fullScript.removeAll(inScript); 713 // inScript = new UnicodeSet("[[:sc=" + UScript.getShortName(script) + ":]-" + 714 // fullScript.toPattern(false) + "]"); 715 // } 716 t.addRow() 717 .addCell(linkedScriptName) 718 .addCell("") 719 .addCell(String.valueOf(inScript.size())) 720 .addCell("missing (NFKC)!") 721 .addCell(safeUnicodeSet(inScript)) 722 .finishRow(); 723 } 724 } 725 t.addRow() 726 .addCell("") 727 .addCell("") 728 .addCell(String.valueOf(missingScripts.size())) 729 .addCell("missing scripts!") 730 .addCell(missingScripts.toString()) 731 .finishRow(); 732 pw.println(t.toTable()); 733 } 734 } 735 getInfo(Id keyboardId, UnicodeSet common, CLDRFile cldrFile)736 private static String getInfo(Id keyboardId, UnicodeSet common, CLDRFile cldrFile) { 737 Counter<String> results = new Counter<String>(); 738 for (String s : common) { 739 int first = s.codePointAt(0); // first char is good enough 740 results.add(UScript.getShortName(UScript.getScript(first)), 1); 741 } 742 results.remove("Zyyy"); 743 results.remove("Zinh"); 744 results.remove("Zzzz"); 745 746 if (cldrFile != null) { 747 UnicodeSet exemplars = new UnicodeSet(cldrFile.getExemplarSet("", WinningChoice.WINNING)); 748 UnicodeSet auxExemplars = cldrFile.getExemplarSet("auxiliary", WinningChoice.WINNING); 749 if (auxExemplars != null) { 750 exemplars.addAll(auxExemplars); 751 } 752 UnicodeSet punctuationExemplars = cldrFile.getExemplarSet("punctuation", WinningChoice.WINNING); 753 if (punctuationExemplars != null) { 754 exemplars.addAll(punctuationExemplars); 755 } 756 exemplars.addAll(getNumericExemplars(cldrFile)); 757 exemplars.addAll(getQuotationMarks(cldrFile)); 758 exemplars.add(" "); 759 addComparison(keyboardId, common, exemplars, results); 760 } 761 StringBuilder b = new StringBuilder(); 762 for (String entry : results.keySet()) { 763 if (b.length() != 0) { 764 b.append(", "); 765 } 766 b.append(entry).append(":").append(results.get(entry)); 767 } 768 return b.toString(); 769 } 770 addComparison(Id keyboardId, UnicodeSet keyboard, UnicodeSet exemplars, Counter<String> results)771 private static void addComparison(Id keyboardId, UnicodeSet keyboard, UnicodeSet exemplars, 772 Counter<String> results) { 773 UnicodeSet common = new UnicodeSet(keyboard).retainAll(exemplars); 774 if (common.size() != 0) { 775 results.add("k∩cldr", common.size()); 776 } 777 common = new UnicodeSet(keyboard).removeAll(exemplars); 778 if (common.size() != 0) { 779 results.add("k‑cldr", common.size()); 780 if (keyboardId != null) { 781 common.remove(0, 0x7F); // don't care much about ASCII. 782 logInfo.put(Row.of("k-cldr\t" + keyboardId.getBaseLanguage(), common), keyboardId); 783 // Log.logln(keyboardId + "\tk-cldr\t" + common.toPattern(false)); 784 } 785 } 786 common = new UnicodeSet(exemplars).removeAll(keyboard).remove("ss"); 787 if (common.size() != 0) { 788 results.add("cldr‑k", common.size()); 789 if (keyboardId != null && SKIP_LOG.containsNone(common)) { 790 logInfo.put(Row.of("cldr‑k\t" + keyboardId.getBaseLanguage(), common), keyboardId); 791 // Log.logln(keyboardId + "\tcldr‑k\t" + common.toPattern(false)); 792 } 793 } 794 } 795 796 static final UnicodeSet SKIP_LOG = new UnicodeSet("[가一]").freeze(); 797 static Relation<Row.R2<String, UnicodeSet>, Id> logInfo = Relation.of(new TreeMap<Row.R2<String, UnicodeSet>, Set<Id>>(), TreeSet.class); 798 799 static class Id implements Comparable<Id> { 800 final String locale; 801 final String platform; 802 final String variant; 803 final String platformVersion; 804 Id(String input, String platformVersion)805 Id(String input, String platformVersion) { 806 int pos = input.indexOf("-t-k0-"); 807 String localeTemp = input.substring(0, pos); 808 locale = ULocale.minimizeSubtags(ULocale.forLanguageTag(localeTemp)).toLanguageTag(); 809 pos += 6; 810 int pos2 = input.indexOf('-', pos); 811 if (pos2 > 0) { 812 platform = input.substring(pos, pos2); 813 variant = input.substring(pos2 + 1); 814 } else { 815 platform = input.substring(pos); 816 variant = ""; 817 } 818 this.platformVersion = platformVersion; 819 } 820 821 @Override compareTo(Id other)822 public int compareTo(Id other) { 823 int result; 824 if (0 != (result = locale.compareTo(other.locale))) { 825 return result; 826 } 827 if (0 != (result = platform.compareTo(other.platform))) { 828 return result; 829 } 830 if (0 != (result = variant.compareTo(other.variant))) { 831 return result; 832 } 833 return 0; 834 } 835 836 @Override toString()837 public String toString() { 838 return locale + "/" + platform + "/" + variant; 839 } 840 getBaseLanguage()841 public String getBaseLanguage() { 842 int pos = locale.indexOf('-'); 843 return pos < 0 ? locale : locale.substring(0, pos); 844 } 845 } 846 847 static class IdSet { 848 Map<String, Relation<String, String>> data = new TreeMap<String, Relation<String, String>>(); 849 add(Id id)850 public void add(Id id) { 851 Relation<String, String> platform2variant = data.get(id.platform); 852 if (platform2variant == null) { 853 data.put(id.platform, platform2variant = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class)); 854 } 855 platform2variant.put(id.locale, id.variant); 856 } 857 addAll(Collection<Id> idSet)858 public void addAll(Collection<Id> idSet) { 859 for (Id id : idSet) { 860 add(id); 861 } 862 } 863 toString(IdSet allIds)864 public String toString(IdSet allIds) { 865 if (this.equals(allIds)) { 866 return "*"; 867 } 868 StringBuilder b = new StringBuilder(); 869 final Set<Entry<String, Relation<String, String>>> entrySet = data.entrySet(); 870 boolean first = true; 871 for (Entry<String, Relation<String, String>> entry : entrySet) { 872 if (first) { 873 first = false; 874 } else { 875 b.append(" "); 876 } 877 String key = entry.getKey(); 878 Set<Entry<String, Set<String>>> valueSet = entry.getValue().keyValuesSet(); 879 b.append(key).append(":"); 880 appendLocaleAndVariants(b, valueSet, allIds.data.get(key)); 881 } 882 return b.toString(); 883 } 884 appendLocaleAndVariants(StringBuilder b, Set<Entry<String, Set<String>>> set, Relation<String, String> relation)885 private void appendLocaleAndVariants(StringBuilder b, Set<Entry<String, Set<String>>> set, 886 Relation<String, String> relation) { 887 if (set.equals(relation.keyValuesSet())) { 888 b.append("*"); 889 return; 890 } 891 final int setSize = set.size(); 892 if (setSize > 9) { 893 b.append(setSize).append("/").append(relation.size()); 894 return; 895 } 896 final boolean isSingle = setSize == 1; 897 if (!isSingle) b.append("("); 898 boolean first = true; 899 for (Entry<String, Set<String>> item : set) { 900 if (first) { 901 first = false; 902 } else { 903 b.append("|"); 904 } 905 final String key = item.getKey(); 906 b.append(key); 907 final Set<String> variants = item.getValue(); 908 final int size = variants.size(); 909 if (size != 0) { 910 if (size == 1) { 911 String firstOne = variants.iterator().next(); 912 if (firstOne.isEmpty()) { 913 continue; // fr-CA/∅ => fr-CA 914 } 915 } 916 b.append("/"); 917 appendVariant(b, variants, relation.get(key)); 918 } 919 } 920 if (!isSingle) b.append(")"); 921 } 922 appendVariant(StringBuilder b, Set<String> set, Set<String> set2)923 private void appendVariant(StringBuilder b, Set<String> set, Set<String> set2) { 924 if (set.equals(set2)) { 925 b.append("*"); 926 return; 927 } 928 final boolean isSingle = set.size() == 1; 929 if (!isSingle) b.append("("); 930 boolean first = true; 931 for (String item : set) { 932 if (first) { 933 first = false; 934 } else { 935 b.append("|"); 936 } 937 b.append(item.isEmpty() ? "∅" : item); 938 } 939 if (!isSingle) b.append(")"); 940 } 941 isEquals(Object other)942 public boolean isEquals(Object other) { 943 return data.equals(((IdSet) other).data); 944 } 945 hashCode()946 public int hashCode() { 947 return data.hashCode(); 948 } 949 } 950 951 // public static class Key { 952 // Iso iso; 953 // ModifierSet modifierSet; 954 // } 955 // /** 956 // * Return all possible results. Could be external utility. WARNING: doesn't account for transform='no' or 957 // failure='omit'. 958 // */ 959 // public Map<String,List<Key>> getPossibleSource() { 960 // Map<String,List<Key>> results = new HashMap<String,List<Key>>(); 961 // UnicodeSet results = new UnicodeSet(); 962 // addOutput(getBaseMap().iso2output.values(), results); 963 // for (KeyMap keymap : getKeyMaps()) { 964 // addOutput(keymap.string2output.values(), results); 965 // } 966 // for (Transforms transforms : getTransforms().values()) { 967 // // loop, to catch empty case 968 // for (String result : transforms.string2string.values()) { 969 // if (!result.isEmpty()) { 970 // results.add(result); 971 // } 972 // } 973 // } 974 // return results; 975 // } 976 getQuotationMarks(CLDRFile file)977 static UnicodeSet getQuotationMarks(CLDRFile file) { 978 UnicodeSet results = new UnicodeSet(); 979 // TODO should have a test to make sure these are in exemplars. 980 results.add(file.getStringValue("//ldml/delimiters/quotationEnd")); 981 results.add(file.getStringValue("//ldml/delimiters/quotationStart")); 982 results.add(file.getStringValue("//ldml/delimiters/alternateQuotationEnd")); 983 results.add(file.getStringValue("//ldml/delimiters/alternateQuotationStart")); 984 return results; 985 } 986 987 // TODO Add as utility to CLDRFile getNumericExemplars(CLDRFile file)988 static UnicodeSet getNumericExemplars(CLDRFile file) { 989 UnicodeSet results = new UnicodeSet(); 990 String defaultNumberingSystem = file.getStringValue("//ldml/numbers/defaultNumberingSystem"); 991 String nativeNumberingSystem = file.getStringValue("//ldml/numbers/otherNumberingSystems/native"); 992 // "//ldml/numbers/otherNumberingSystems/native" 993 addNumberingSystem(file, results, "latn"); 994 if (!defaultNumberingSystem.equals("latn")) { 995 addNumberingSystem(file, results, defaultNumberingSystem); 996 } 997 if (!nativeNumberingSystem.equals("latn") && !nativeNumberingSystem.equals(defaultNumberingSystem)) { 998 addNumberingSystem(file, results, nativeNumberingSystem); 999 } 1000 return results; 1001 } 1002 addNumberingSystem(CLDRFile file, UnicodeSet results, String numberingSystem)1003 public static void addNumberingSystem(CLDRFile file, UnicodeSet results, String numberingSystem) { 1004 String digits = supplementalDataInfo.getDigits(numberingSystem); 1005 results.addAll(digits); 1006 addSymbol(file, numberingSystem, "decimal", results); 1007 addSymbol(file, numberingSystem, "group", results); 1008 addSymbol(file, numberingSystem, "minusSign", results); 1009 addSymbol(file, numberingSystem, "percentSign", results); 1010 addSymbol(file, numberingSystem, "plusSign", results); 1011 } 1012 addSymbol(CLDRFile file, String numberingSystem, String key, UnicodeSet results)1013 public static void addSymbol(CLDRFile file, String numberingSystem, String key, UnicodeSet results) { 1014 String symbol = file.getStringValue("//ldml/numbers/symbols[@numberSystem=\"" + numberingSystem + "\"]/" + 1015 key); 1016 results.add(symbol); 1017 } 1018 } 1019