1 package org.unicode.cldr.tool; 2 3 import java.io.FileNotFoundException; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.util.ArrayList; 7 import java.util.Collections; 8 import java.util.EnumMap; 9 import java.util.EnumSet; 10 import java.util.HashSet; 11 import java.util.LinkedHashSet; 12 import java.util.List; 13 import java.util.Map; 14 import java.util.Set; 15 import java.util.regex.Matcher; 16 17 import org.unicode.cldr.draft.FileUtilities; 18 import org.unicode.cldr.tool.ToolConstants.ChartStatus; 19 import org.unicode.cldr.util.CLDRConfig; 20 import org.unicode.cldr.util.CLDRPaths; 21 import org.unicode.cldr.util.CldrUtility; 22 import org.unicode.cldr.util.DtdData; 23 import org.unicode.cldr.util.DtdData.Attribute; 24 import org.unicode.cldr.util.DtdData.AttributeStatus; 25 import org.unicode.cldr.util.DtdData.Element; 26 import org.unicode.cldr.util.DtdType; 27 import org.unicode.cldr.util.SupplementalDataInfo; 28 29 import com.google.common.base.Joiner; 30 import com.google.common.base.MoreObjects; 31 import com.google.common.base.Splitter; 32 import com.google.common.collect.ImmutableMultimap; 33 import com.google.common.collect.ImmutableSet; 34 import com.google.common.collect.Multimap; 35 import com.ibm.icu.impl.Utility; 36 import com.ibm.icu.util.VersionInfo; 37 38 /** 39 * Changed ShowDtdDiffs into a chart. 40 * @author markdavis 41 */ 42 public class ChartDtdDelta extends Chart { 43 44 private static final Splitter SPLITTER_SPACE = Splitter.on(' '); 45 46 private static final String NEW_PREFIX = "+"; 47 48 private static final String DEPRECATED_PREFIX = "⊖"; 49 private static final String UNDEPRECATED_PREFIX = "⊙"; // no occurances yet 50 51 private static final String ORDERED_SIGN = "⇣"; 52 private static final String UNORDERED_SIGN = "⇟"; 53 54 private static final String TECHPREVIEW_SIGN = ""; 55 private static final String UNTECHPREVIEW_SIGN = "ⓟ"; 56 57 58 private static final Set<String> OMITTED_ATTRIBUTES = Collections.singleton("⊕"); 59 main(String[] args)60 public static void main(String[] args) { 61 new ChartDtdDelta().writeChart(null); 62 } 63 64 @Override getDirectory()65 public String getDirectory() { 66 return FormattedFileWriter.CHART_TARGET_DIR; 67 } 68 69 @Override getTitle()70 public String getTitle() { 71 return "DTD Deltas"; 72 } 73 74 @Override getExplanation()75 public String getExplanation() { 76 return "<p>Changes to the LDML DTDs over time.</p>\n" 77 + "<ul>\n" 78 + "<li>New elements or attributes are indicated with a + sign, and newly deprecated ones with a ⊖ sign.</li>\n" 79 + "<li>Element attributes are abbreviated as ⊕ where is no change to them, " 80 + "but the element is newly the child of another.</li>\n" 81 + "<li>LDML DTDs have augmented data:\n" 82 + "<ul><li>Attribute status is marked by: " 83 + AttributeStatus.distinguished.shortName + "=" + AttributeStatus.distinguished + ", " 84 + AttributeStatus.value.shortName + "=" + AttributeStatus.value + ", or " 85 + AttributeStatus.metadata.shortName + "=" + AttributeStatus.metadata + ".</li>\n" 86 + "<li>Attribute value constraints are marked with ⟨…⟩ (for DTD constraints) and ⟪…⟫ (for augmented constraints, added in v35.0).</li>\n" 87 + "<li>Changes in status or constraints are shown with ➠, with identical sections shown with ….</li>\n" 88 + "<li>Newly ordered elements are indicated with " + ORDERED_SIGN + "; newly unordered with " + UNORDERED_SIGN + ".</li>\n" 89 + "<li>Newly tech-preview items are marked with " + TECHPREVIEW_SIGN + "; newly graduated from tech preview with " + UNTECHPREVIEW_SIGN + ".</li>\n" 90 + "<li>The following elements are skipped: " + SKIP_ELEMENTS + " and " + SKIP_TYPE_ELEMENTS + "</li>\n" 91 + "<li>The following attributes are skipped: " + SKIP_ATTRIBUTES + " and " + SKIP_ATTRIBUTE_MATCHES + "</li>\n" 92 + "</ul></li></ul>\n" 93 + "<p>For more information, see the LDML spec.</p>"; 94 } 95 96 @Override writeContents(FormattedFileWriter pw)97 public void writeContents(FormattedFileWriter pw) throws IOException { 98 TablePrinter tablePrinter = new TablePrinter() 99 .addColumn("Version", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 100 .setSortPriority(0) 101 .setSortAscending(false) 102 .setBreakSpans(true) 103 .addColumn("Dtd Type", "class='source'", null, "class='source'", true) 104 .setSortPriority(1) 105 106 .addColumn("Intermediate Path", "class='source'", null, "class='target'", true) 107 .setSortPriority(2) 108 109 .addColumn("Element", "class='target'", null, "class='target'", true) 110 .setSpanRows(false) 111 .addColumn("Attributes", "class='target'", null, "class='target'", true) 112 .setSpanRows(false); 113 114 String last = null; 115 116 for (String current : ToolConstants.CHART_STATUS == ChartStatus.beta ? ToolConstants.CLDR_RELEASE_AND_DEV_VERSION_SET : ToolConstants.CLDR_RELEASE_VERSION_SET) { 117 System.out.println("DTD delta: " + current); 118 final boolean finalVersion = current.equals(ToolConstants.DEV_VERSION); 119 String currentName = finalVersion ? ToolConstants.CHART_DISPLAY_VERSION : current; 120 for (DtdType type : TYPES) { 121 String firstVersion = type.firstVersion; // FIRST_VERSION.get(type); 122 if (firstVersion != null && current != null && current.compareTo(firstVersion) < 0) { 123 // skip if current is too old to have “type” 124 continue; 125 } 126 DtdData dtdCurrent = null; 127 try { 128 dtdCurrent = DtdData.getInstance(type, 129 finalVersion 130 // && ToolConstants.CHART_STATUS != ToolConstants.ChartStatus.release 131 ? null 132 : current); 133 } catch (Exception e) { 134 if (!(e.getCause() instanceof FileNotFoundException)) { 135 throw e; 136 } 137 System.out.println(e.getMessage() + ", " + e.getCause().getMessage()); 138 continue; 139 } 140 DtdData dtdLast = null; 141 if (last != null && (firstVersion == null || last.compareTo(firstVersion) >= 0)) { 142 // only read if last isn’t too old to have “type” 143 dtdLast = DtdData.getInstance(type, last); 144 } 145 diff(currentName, dtdLast, dtdCurrent); 146 } 147 last = current; 148 if (current.contentEquals(ToolConstants.CHART_VERSION)) { 149 break; 150 } 151 } 152 153 for (DiffElement datum : data) { 154 tablePrinter.addRow() 155 .addCell(datum.getVersionString()) 156 .addCell(datum.dtdType) 157 .addCell(datum.newPath) 158 .addCell(datum.newElement) 159 .addCell(datum.attributeNames) 160 .finishRow(); 161 } 162 pw.write(tablePrinter.toTable()); 163 pw.write(Utility.repeat("<br>", 50)); 164 try (PrintWriter tsvFile = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "/tsv/", "dtd_deltas.tsv")) { 165 tablePrinter.toTsv(tsvFile); 166 } 167 } 168 169 static final String NONE = " "; 170 171 static final SupplementalDataInfo SDI = CLDRConfig.getInstance().getSupplementalDataInfo(); 172 173 static Set<DtdType> TYPES = EnumSet.allOf(DtdType.class); 174 static { 175 TYPES.remove(DtdType.ldmlICU); 176 } 177 178 static final Map<DtdType, String> FIRST_VERSION = new EnumMap<>(DtdType.class); 179 static { FIRST_VERSION.put(DtdType.ldmlBCP47, "1.7.2")180 FIRST_VERSION.put(DtdType.ldmlBCP47, "1.7.2"); FIRST_VERSION.put(DtdType.keyboard, "22.1")181 FIRST_VERSION.put(DtdType.keyboard, "22.1"); FIRST_VERSION.put(DtdType.platform, "22.1")182 FIRST_VERSION.put(DtdType.platform, "22.1"); 183 } 184 diff(String prefix, DtdData dtdLast, DtdData dtdCurrent)185 private void diff(String prefix, DtdData dtdLast, DtdData dtdCurrent) { 186 Map<String, Element> oldNameToElement = dtdLast == null ? Collections.emptyMap() : dtdLast.getElementFromName(); 187 checkNames(prefix, dtdCurrent, dtdLast, oldNameToElement, "/", dtdCurrent.ROOT, new HashSet<Element>(), false); 188 } 189 190 static final DtdType DEBUG_DTD = null; // set to enable 191 static final String DEBUG_ELEMENT = "lias"; 192 static final boolean SHOW = false; 193 194 @SuppressWarnings("unused") checkNames(String version, DtdData dtdCurrent, DtdData dtdLast, Map<String, Element> oldNameToElement, String path, Element element, HashSet<Element> seen, boolean showAnyway)195 private void checkNames(String version, DtdData dtdCurrent, DtdData dtdLast, Map<String, Element> oldNameToElement, String path, Element element, 196 HashSet<Element> seen, boolean showAnyway) { 197 String name = element.getName(); 198 199 if (SKIP_ELEMENTS.contains(name)) { 200 return; 201 } 202 if (SKIP_TYPE_ELEMENTS.containsEntry(dtdCurrent.dtdType, name)) { 203 return; 204 } 205 206 String newPath = path + "/" + element.name; 207 208 // if an element is newly a child of another but has already been seen, you'll have special indication 209 if (seen.contains(element)) { 210 if (showAnyway) { 211 addData(dtdCurrent, NEW_PREFIX + name, version, newPath, OMITTED_ATTRIBUTES); 212 } 213 return; 214 } 215 216 seen.add(element); 217 if (SHOW && ToolConstants.CHART_DISPLAY_VERSION.equals(version)) { 218 System.out.println(dtdCurrent.dtdType + "\t" + name); 219 } 220 if (DEBUG_DTD == dtdCurrent.dtdType && name.contains(DEBUG_ELEMENT)) { 221 int debug = 0; 222 } 223 224 225 Element oldElement = null; 226 boolean ordered = element.isOrdered(); 227 boolean currentTechPreview = element.isTechPreview(); 228 229 if (!oldNameToElement.containsKey(name)) { 230 Set<String> attributeNames = getAttributeNames(dtdCurrent, dtdLast, name, Collections.emptyMap(), element.getAttributes()); 231 final String prefix = NEW_PREFIX + (currentTechPreview ? TECHPREVIEW_SIGN : ""); 232 addData(dtdCurrent, prefix + name + (ordered ? ORDERED_SIGN : ""), version, newPath, attributeNames); 233 } else { 234 oldElement = oldNameToElement.get(name); 235 boolean oldOrdered = oldElement.isOrdered(); 236 Set<String> attributeNames = getAttributeNames(dtdCurrent, dtdLast, name, oldElement.getAttributes(), element.getAttributes()); 237 boolean currentDeprecated = element.isDeprecated(); 238 boolean lastDeprecated = dtdLast == null ? false : oldElement.isDeprecated(); // + (currentDeprecated ? "ⓓ" : "") 239 boolean lastTechPreview = dtdLast == null ? false : oldElement.isTechPreview(); // + (currentDeprecated ? "ⓓ" : "") 240 241 String deprecatedStatus = currentDeprecated == lastDeprecated ? "" 242 : currentDeprecated ? DEPRECATED_PREFIX : UNDEPRECATED_PREFIX ; 243 String orderingStatus = (ordered == oldOrdered || currentDeprecated) ? "" 244 : ordered ? ORDERED_SIGN : UNORDERED_SIGN; 245 String previewStatus = (currentTechPreview == lastTechPreview || currentDeprecated) ? "" 246 : currentTechPreview ? TECHPREVIEW_SIGN : UNTECHPREVIEW_SIGN; 247 248 if (!orderingStatus.isEmpty() 249 || !previewStatus.isEmpty() 250 || !deprecatedStatus.isEmpty() 251 || !attributeNames.isEmpty()) { 252 addData(dtdCurrent, deprecatedStatus + previewStatus + name + orderingStatus, version, newPath, attributeNames); 253 } 254 } 255 if (element.getName().equals("coordinateUnit")) { 256 System.out.println(version + "\toordinateUnit\t" + element.getChildren().keySet()); 257 } 258 Set<Element> oldChildren = oldElement == null ? Collections.emptySet() : oldElement.getChildren().keySet(); 259 for (Element child : element.getChildren().keySet()) { 260 showAnyway = true; 261 for (Element oldChild : oldChildren) { 262 if (oldChild.getName().equals(child.getName())) { 263 showAnyway = false; 264 break; 265 } 266 } 267 checkNames(version, dtdCurrent, dtdLast, oldNameToElement, newPath, child, seen, showAnyway); 268 } 269 } 270 271 enum DiffType { 272 Element, Attribute, AttributeValue 273 } 274 275 private static class DiffElement { 276 277 private static final String START_ATTR = "<div>"; 278 private static final String END_ATTR = "</div>"; 279 final VersionInfo version; 280 final DtdType dtdType; 281 final boolean isBeta; 282 final String newPath; 283 final String newElement; 284 final String attributeNames; 285 DiffElement(DtdData dtdCurrent, String version, String newPath, String newElement, Set<String> attributeNames2)286 public DiffElement(DtdData dtdCurrent, String version, String newPath, String newElement, Set<String> attributeNames2) { 287 isBeta = version.endsWith("β"); 288 try { 289 this.version = isBeta ? VersionInfo.getInstance(version.substring(0, version.length() - 1)) : VersionInfo.getInstance(version); 290 } catch (Exception e) { 291 e.printStackTrace(); 292 throw e; 293 } 294 dtdType = dtdCurrent.dtdType; 295 this.newPath = fix(newPath); 296 this.attributeNames = attributeNames2.isEmpty() ? NONE : 297 START_ATTR + Joiner.on(END_ATTR + START_ATTR).join(attributeNames2) + END_ATTR; 298 this.newElement = newElement; 299 } 300 fix(String substring)301 private String fix(String substring) { 302 int base = substring.indexOf('/', 2); 303 if (base < 0) return ""; 304 int last = substring.lastIndexOf('/'); 305 if (last <= base) return "/"; 306 substring = substring.substring(base, last); 307 return substring.replace("/", "\u200B/") + "/"; 308 } 309 310 @Override toString()311 public String toString() { 312 return MoreObjects.toStringHelper(this) 313 .add("version", getVersionString()) 314 .add("dtdType", dtdType) 315 .add("newPath", newPath) 316 .add("newElement", newElement) 317 .add("attributeNames", attributeNames) 318 .toString(); 319 } 320 getVersionString()321 private String getVersionString() { 322 return version.getVersionString(2, 4) + (isBeta ? "β" : ""); 323 } 324 } 325 326 List<DiffElement> data = new ArrayList<>(); 327 addData(DtdData dtdCurrent, String element, String prefix, String newPath, Set<String> attributeNames)328 private void addData(DtdData dtdCurrent, String element, String prefix, String newPath, Set<String> attributeNames) { 329 DiffElement item = new DiffElement(dtdCurrent, prefix, newPath, element, attributeNames); 330 data.add(item); 331 } 332 333 static final Set<String> SKIP_ELEMENTS = ImmutableSet.of("generation", "identity", "special"); // , "telephoneCodeData" 334 335 static final Multimap<DtdType, String> SKIP_TYPE_ELEMENTS = ImmutableMultimap.of(DtdType.ldml, "alias"); 336 337 static final Set<String> SKIP_ATTRIBUTES = ImmutableSet.of( 338 "references", 339 "standard", 340 "draft" 341 ); 342 343 static final Multimap<String, String> SKIP_ATTRIBUTE_MATCHES = ImmutableMultimap.of( 344 "alt", "", "alt", "⟪literal/variant⟫"); 345 getAttributeNames(DtdData dtdCurrent, DtdData dtdLast, String elementName, Map<Attribute, Integer> attributesOld, Map<Attribute, Integer> attributes)346 private static Set<String> getAttributeNames(DtdData dtdCurrent, DtdData dtdLast, String elementName, 347 Map<Attribute, Integer> attributesOld, 348 Map<Attribute, Integer> attributes) { 349 Set<String> names = new LinkedHashSet<>(); 350 if (elementName.equals("coordinateUnit")) { 351 int debug = 0; 352 } 353 354 main: 355 // we want to add a name that is new or that becomes deprecated 356 for (Attribute attribute : attributes.keySet()) { 357 String name = attribute.getName(); 358 if (SKIP_ATTRIBUTES.contains(name)) { 359 continue; 360 } 361 String match = attribute.getMatchString(); 362 AttributeStatus status = attribute.attributeStatus; 363 String display = NEW_PREFIX + name; 364 // if (isDeprecated(dtdCurrent, elementName, name)) { // SDI.isDeprecated(dtdCurrent, elementName, name, "*")) { 365 // continue; 366 // } 367 String oldMatch = "?"; 368 String pre, post; 369 Attribute attributeOld = attribute.getMatchingName(attributesOld); 370 if (attributeOld == null) { 371 if (SKIP_ATTRIBUTE_MATCHES.containsEntry(name, match)) { 372 continue main; 373 } 374 display = NEW_PREFIX + name + " " + AttributeStatus.getShortName(status) + " " + match; 375 } else if (attribute.isDeprecated() && !attributeOld.isDeprecated()) { 376 display = DEPRECATED_PREFIX + name; 377 } else { 378 oldMatch = attributeOld.getMatchString(); 379 AttributeStatus oldStatus = attributeOld.attributeStatus; 380 381 boolean matchEquals = match.equals(oldMatch); 382 if (status != oldStatus) { 383 pre = AttributeStatus.getShortName(oldStatus); 384 post = AttributeStatus.getShortName(status); 385 if (!matchEquals) { 386 pre += " " + oldMatch; 387 post += " " + match; 388 } 389 } else if (!matchEquals) { 390 if (oldMatch.isEmpty() 391 && SKIP_ATTRIBUTE_MATCHES.containsEntry(name, match)) { 392 continue main; 393 } 394 pre = oldMatch; 395 post = match; 396 } else { 397 continue main; // skip attribute entirely; 398 } 399 display = name + " " + diff(pre, post); 400 } 401 names.add(display); 402 } 403 return names; 404 } 405 diff(String pre, String post)406 public static String diff(String pre, String post) { 407 Matcher matcherPre = Attribute.LEAD_TRAIL.matcher(pre); 408 Matcher matcherPost = Attribute.LEAD_TRAIL.matcher(post); 409 if (matcherPre.matches() && matcherPost.matches()) { 410 List<String> preParts = SPLITTER_SPACE.splitToList(matcherPre.group(2)); 411 List<String> postParts = SPLITTER_SPACE.splitToList(matcherPost.group(2)); 412 pre = matcherPre.group(1) + remove(preParts, postParts) + matcherPre.group(3); 413 post = matcherPost.group(1) + remove(postParts, preParts) + matcherPost.group(3); 414 } 415 return pre + "➠" + post; 416 } 417 remove(List<String> main, List<String> toRemove)418 private static String remove(List<String> main, List<String> toRemove) { 419 List<String> result = new ArrayList<>(); 420 boolean removed = false; 421 for (String s : main) { 422 if (toRemove.contains(s)) { 423 removed = true; 424 } else { 425 if (removed) { 426 result.add("…"); 427 removed = false; 428 } 429 result.add(s); 430 } 431 } 432 if (removed) { 433 result.add("…"); 434 } 435 return Joiner.on(" ").join(result); 436 } 437 438 // private static boolean isDeprecated(DtdData dtdCurrent, String elementName, String attributeName) { 439 // try { 440 // return dtdCurrent.isDeprecated(elementName, attributeName, "*"); 441 // } catch (DtdData.IllegalByDtdException e) { 442 // return true; 443 // } 444 // } 445 } 446