1 package org.unicode.cldr.test; 2 3 import com.google.common.cache.CacheBuilder; 4 import com.google.common.cache.CacheLoader; 5 import com.google.common.cache.LoadingCache; 6 import com.ibm.icu.util.ICUException; 7 import com.ibm.icu.util.Output; 8 import java.util.LinkedHashSet; 9 import java.util.List; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import java.util.concurrent.ExecutionException; 13 import java.util.regex.Matcher; 14 import java.util.regex.Pattern; 15 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 16 import org.unicode.cldr.util.ApproximateWidth; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.Level; 19 import org.unicode.cldr.util.PatternCache; 20 import org.unicode.cldr.util.Rational; 21 import org.unicode.cldr.util.RegexLookup; 22 import org.unicode.cldr.util.StandardCodes.LstrType; 23 import org.unicode.cldr.util.SupplementalDataInfo; 24 import org.unicode.cldr.util.UnitConverter; 25 import org.unicode.cldr.util.UnitConverter.UnitId; 26 import org.unicode.cldr.util.Validity; 27 28 public class CheckWidths extends CheckCLDR { 29 // remember to add this class to the list in CheckCLDR.getCheckAll 30 // to run just this test, on just locales starting with 'nl', use CheckCLDR with -fnl.* 31 // -t.*CheckWidths.* 32 private static CoverageLevel2 coverageLevel; 33 private Level requiredLevel; 34 35 private static UnitWidthUtil UNIT_WIDTHS_UTIL = UnitWidthUtil.getInstance(); 36 37 /** Controls for the warning about too many components, and for when to cause error. */ 38 public static final int WARN_COMPONENTS_PER_ANNOTATION = 12; 39 40 // TBD lower this down after Meta data added 41 public static final int MAX_COMPONENTS_PER_ANNOTATION = 20; 42 43 SupplementalDataInfo supplementalData; 44 45 private static final double EM = ApproximateWidth.getWidth("月"); 46 47 private static final boolean DEBUG = true; 48 49 private enum Measure { 50 CODE_POINTS, 51 DISPLAY_WIDTH, 52 SET_ELEMENTS 53 } 54 55 private enum LimitType { 56 MINIMUM, 57 MAXIMUM 58 } 59 60 private enum Special { 61 NONE, 62 QUOTES, 63 PLACEHOLDERS, 64 NUMBERSYMBOLS, 65 NUMBERFORMAT, 66 BARS, 67 PLACEHOLDER_UNITS 68 } 69 70 private static final Pattern PLACEHOLDER_PATTERN = PatternCache.get("\\{\\d\\}"); 71 72 private static class Limit { 73 final double warningReference; 74 final double errorReference; 75 final LimitType limit; 76 final Measure measure; 77 final Special special; 78 final String message; 79 final Subtype subtype; 80 final boolean debug; 81 Limit( double warningReference, double errorReference, Measure measure, LimitType limit, Special special, boolean debug)82 public Limit( 83 double warningReference, 84 double errorReference, 85 Measure measure, 86 LimitType limit, 87 Special special, 88 boolean debug) { 89 this.debug = debug; 90 this.warningReference = warningReference; 91 this.errorReference = errorReference; 92 this.limit = limit; 93 this.measure = measure; 94 this.special = special; 95 switch (limit) { 96 case MINIMUM: 97 this.subtype = Subtype.valueTooNarrow; 98 switch (measure) { 99 case CODE_POINTS: 100 this.message = "Expected no fewer than {0} character(s), but was {1}."; 101 break; 102 case DISPLAY_WIDTH: 103 this.message = "Too narrow by about {2}% (with common fonts)."; 104 break; 105 default: 106 throw new IllegalArgumentException(); 107 } 108 break; 109 case MAXIMUM: 110 switch (measure) { 111 case CODE_POINTS: 112 this.message = "Expected no more than {0} character(s), but was {1}."; 113 this.subtype = Subtype.valueTooWide; 114 break; 115 case DISPLAY_WIDTH: 116 this.message = "Too wide by about {2}% (with common fonts)."; 117 this.subtype = Subtype.valueTooWide; 118 break; 119 case SET_ELEMENTS: 120 this.message = 121 "There cannot be more than {3} item(s), and it is recommended to not have more than {0} item(s). Found {1} item(s)."; 122 this.subtype = Subtype.tooManyValues; 123 break; 124 default: 125 throw new IllegalArgumentException(); 126 } 127 break; 128 default: 129 throw new IllegalArgumentException(); 130 } 131 } 132 Limit( double d, double e, Measure displayWidth, LimitType maximum, Special placeholders)133 public Limit( 134 double d, double e, Measure displayWidth, LimitType maximum, Special placeholders) { 135 this(d, e, displayWidth, maximum, placeholders, false); 136 } 137 hasProblem( String path, String value, List<CheckStatus> result, CheckCLDR cause, Boolean aliasedAndComprehensive)138 boolean hasProblem( 139 String path, 140 String value, 141 List<CheckStatus> result, 142 CheckCLDR cause, 143 Boolean aliasedAndComprehensive) { 144 double factor = 1d; 145 switch (special) { 146 case NUMBERFORMAT: 147 String[] values = value.split(";", 2); 148 // If it's a number format with positive and negative subpatterns, just check 149 // the longer one. 150 value = 151 (values.length == 2 && values[1].length() > values[0].length()) 152 ? values[1] 153 : values[0]; 154 value = value.replace("'", ""); 155 break; 156 case QUOTES: 157 value = value.replace("'", ""); 158 break; 159 case PLACEHOLDER_UNITS: 160 factor = UNIT_WIDTHS_UTIL.getRoughComponentMax(path); 161 // fall through ok 162 case PLACEHOLDERS: 163 value = PLACEHOLDER_PATTERN.matcher(value).replaceAll(""); 164 break; 165 case NUMBERSYMBOLS: 166 value = 167 value.replaceAll( 168 "[\u200E\u200F\u061C]", 169 ""); // don't include LRM/RLM/ALM when checking length of number 170 // symbols 171 break; 172 case BARS: 173 value = 174 value.replaceAll("[^|]", "") 175 + "|"; // Check the number of items by counting separators. Bit 176 // of a hack... 177 break; 178 default: 179 } 180 double valueMeasure = 181 measure == Measure.DISPLAY_WIDTH 182 ? ApproximateWidth.getWidth(value) 183 : value.codePointCount(0, value.length()); 184 CheckStatus.Type errorType = CheckStatus.warningType; 185 switch (limit) { 186 case MINIMUM: 187 if (valueMeasure >= warningReference) { 188 return false; 189 } 190 if (valueMeasure < errorReference 191 && cause.getPhase() != Phase.BUILD 192 && !aliasedAndComprehensive) { 193 errorType = CheckStatus.errorType; 194 } 195 break; 196 case MAXIMUM: 197 if (valueMeasure <= warningReference * factor) { 198 return false; 199 } 200 if (valueMeasure > errorReference * factor 201 && cause.getPhase() != Phase.BUILD 202 && !aliasedAndComprehensive) { 203 // Workaround for ST submission phase only per TC discussion 2018-05-30 204 // Make too many keywords be only a warning until we decide policy (JCE) 205 if (cause.getPhase() == Phase.SUBMISSION 206 && measure.equals(Measure.SET_ELEMENTS)) { 207 errorType = CheckStatus.warningType; 208 } else { 209 errorType = CheckStatus.errorType; 210 } 211 } 212 break; 213 } 214 // the 115 is so that we don't show small percentages 215 // the /10 ...*10 is to round to multiples of 10% percent 216 double percent = 217 (int) (Math.abs(115 * valueMeasure / warningReference - 100.0d) / 10 + 0.49999d) 218 * 10; 219 result.add( 220 new CheckStatus() 221 .setCause(cause) 222 .setMainType(errorType) 223 .setSubtype(subtype) 224 .setMessage( 225 message, 226 warningReference, 227 valueMeasure, 228 percent, 229 errorReference)); 230 return true; 231 } 232 } 233 234 static RegexLookup<Limit[]> lookup = 235 new RegexLookup<Limit[]>() 236 .setPatternTransform(RegexLookup.RegexFinderTransformPath) 237 .addVariable("%A", "\"[^\"]+\"") 238 .addVariable("%P", "\"[ap]m\"") 239 .addVariable("%Q", "[^ap].*|[ap][^m].*") // Anything but am or pm 240 .add( 241 "//ldml/delimiters/(quotation|alternateQuotation)", 242 new Limit[] { 243 new Limit( 244 1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NONE) 245 }) 246 247 // Numeric items should be no more than a single character 248 249 .add( 250 "//ldml/numbers/symbols[@numberSystem=%A]/(decimal|group|minus|percent|perMille|plus)", 251 new Limit[] { 252 new Limit( 253 1, 254 1, 255 Measure.CODE_POINTS, 256 LimitType.MAXIMUM, 257 Special.NUMBERSYMBOLS) 258 }) 259 260 // Now widths 261 // The following are rough measures, just to check strange cases 262 263 .add( 264 "//ldml/characters/ellipsis[@type=\"(final|initial|medial)\"]", 265 new Limit[] { 266 new Limit( 267 2 * EM, 268 5 * EM, 269 Measure.DISPLAY_WIDTH, 270 LimitType.MAXIMUM, 271 Special.PLACEHOLDERS) 272 }) 273 .add( 274 "//ldml/localeDisplayNames/localeDisplayPattern/", 275 new Limit[] { // {0}: {1}, {0} ({1}), , 276 new Limit( 277 2 * EM, 278 3 * EM, 279 Measure.DISPLAY_WIDTH, 280 LimitType.MAXIMUM, 281 Special.PLACEHOLDERS) 282 }) 283 .add( 284 "//ldml/listPatterns/listPattern/listPatternPart[@type=%A]", 285 new Limit[] { // {0} and {1} 286 new Limit( 287 5 * EM, 288 10 * EM, 289 Measure.DISPLAY_WIDTH, 290 LimitType.MAXIMUM, 291 Special.PLACEHOLDERS) 292 }) 293 .add( 294 "//ldml/dates/timeZoneNames/fallbackFormat", 295 new Limit[] { // {1} ({0}) 296 new Limit( 297 2 * EM, 298 3 * EM, 299 Measure.DISPLAY_WIDTH, 300 LimitType.MAXIMUM, 301 Special.PLACEHOLDERS) 302 }) 303 .add( 304 "//ldml/dates/timeZoneNames/(regionFormat|hourFormat)", 305 new Limit[] { // {0} Time, 306 // +HH:mm;-HH:mm 307 new Limit( 308 10 * EM, 309 20 * EM, 310 Measure.DISPLAY_WIDTH, 311 LimitType.MAXIMUM, 312 Special.PLACEHOLDERS) 313 }) 314 .add( 315 "//ldml/dates/timeZoneNames/(gmtFormat|gmtZeroFormat)", 316 new Limit[] { // GMT{0}, GMT 317 new Limit( 318 5 * EM, 319 10 * EM, 320 Measure.DISPLAY_WIDTH, 321 LimitType.MAXIMUM, 322 Special.PLACEHOLDERS) 323 }) 324 325 // Era Abbreviations 326 327 // Allow longer for Japanese calendar eras 328 .add( 329 "//ldml/dates/calendars/calendar[@type=\"japanese\"]/.*/eraAbbr/era[@type=%A]", 330 new Limit[] { 331 new Limit( 332 12 * EM, 333 16 * EM, 334 Measure.DISPLAY_WIDTH, 335 LimitType.MAXIMUM, 336 Special.NONE) 337 }) 338 // Allow longer for ROC calendar eras 339 .add( 340 "//ldml/dates/calendars/calendar[@type=\"roc\"]/.*/eraAbbr/era[@type=%A]", 341 new Limit[] { 342 new Limit( 343 4 * EM, 344 8 * EM, 345 Measure.DISPLAY_WIDTH, 346 LimitType.MAXIMUM, 347 Special.NONE) 348 }) 349 .add( 350 "//ldml/dates/calendars/calendar.*/eraAbbr/era[@type=%A]", 351 new Limit[] { 352 new Limit( 353 3 * EM, 354 6 * EM, 355 Measure.DISPLAY_WIDTH, 356 LimitType.MAXIMUM, 357 Special.NONE) 358 }) 359 360 // am/pm abbreviated 361 .add( 362 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%P]", 363 new Limit[] { 364 new Limit( 365 4 * EM, 366 6 * EM, 367 Measure.DISPLAY_WIDTH, 368 LimitType.MAXIMUM, 369 Special.NONE) 370 }) 371 // other day periods abbreviated 372 .add( 373 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%Q]", 374 new Limit[] { 375 new Limit( 376 8 * EM, 377 12 * EM, 378 Measure.DISPLAY_WIDTH, 379 LimitType.MAXIMUM, 380 Special.NONE) 381 }) 382 // am/pm wide 383 .add( 384 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%P]", 385 new Limit[] { 386 new Limit( 387 5 * EM, 388 10 * EM, 389 Measure.DISPLAY_WIDTH, 390 LimitType.MAXIMUM, 391 Special.NONE) 392 }) 393 // other day periods wide 394 .add( 395 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%Q]", 396 new Limit[] { 397 new Limit( 398 10 * EM, 399 20 * EM, 400 Measure.DISPLAY_WIDTH, 401 LimitType.MAXIMUM, 402 Special.NONE) 403 }) 404 405 // Narrow items 406 407 .add( 408 "//ldml/dates/calendars/calendar.*[@type=\"narrow\"](?!/cyclic|/dayPeriod|/monthPattern)", 409 new Limit[] { 410 new Limit( 411 1.5 * EM, 412 2.25 * EM, 413 Measure.DISPLAY_WIDTH, 414 LimitType.MAXIMUM, 415 Special.NONE) 416 }) 417 // \"(?!am|pm)[^\"]+\"\\ 418 419 // Compact number formats 420 // pattern[@type="100000000000000"] 421 .add( 422 "//ldml/numbers/decimalFormats[@numberSystem=%A]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=%A]/pattern[@type=\"100000000000000", 423 new Limit[] { 424 new Limit( 425 4 * EM, 426 6 * EM, 427 Measure.DISPLAY_WIDTH, 428 LimitType.MAXIMUM, 429 Special.NUMBERFORMAT) 430 }) 431 .add( 432 "//ldml/numbers/decimalFormats[@numberSystem=%A]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=%A]/pattern[@type=\"1", 433 new Limit[] { 434 new Limit( 435 4 * EM, 436 5 * EM, 437 Measure.DISPLAY_WIDTH, 438 LimitType.MAXIMUM, 439 Special.NUMBERFORMAT) 440 }) 441 442 // Short/Narrow units 443 // Note that the EM values are adjusted for units according to the number of 444 // components in the units 445 // See UnitWidthUtil for more information 446 .add( 447 "//ldml/units/unitLength[@type=\"(short|narrow)\"]/unit[@type=%A]/unitPattern", 448 new Limit[] { 449 new Limit( 450 3 * EM, 451 5 * EM, 452 Measure.DISPLAY_WIDTH, 453 LimitType.MAXIMUM, 454 Special.PLACEHOLDER_UNITS) 455 }) 456 457 // Currency Symbols 458 .add( 459 "//ldml/numbers/currencies/currency[@type=%A]/symbol", 460 new Limit[] { 461 new Limit( 462 3 * EM, 463 5 * EM, 464 Measure.DISPLAY_WIDTH, 465 LimitType.MAXIMUM, 466 Special.PLACEHOLDERS) 467 }) 468 469 // "grinning cat face with smiling eyes" should be normal max ~= 160 em 470 // emoji names (not keywords) 471 .add( 472 "//ldml/annotations/annotation[@cp=%A][@type=%A]", 473 new Limit[] { 474 new Limit( 475 20 * EM, 476 100 * EM, 477 Measure.DISPLAY_WIDTH, 478 LimitType.MAXIMUM, 479 Special.NONE), 480 }) 481 .add( 482 "//ldml/annotations/annotation[@cp=%A]", 483 new Limit[] { 484 new Limit( 485 WARN_COMPONENTS_PER_ANNOTATION, 486 MAX_COMPONENTS_PER_ANNOTATION, 487 Measure.SET_ELEMENTS, 488 LimitType.MAXIMUM, 489 Special.BARS) // Allow up to 5 with no warning, up to 7 490 // with no error. 491 }); 492 493 // Quell noisy printout 494 // static { 495 // System.out.println("EMs: " + ApproximateWidth.getWidth("grinning cat face with smiling 496 // eyes")); 497 // } 498 499 Set<Limit> found = new LinkedHashSet<>(); 500 501 @Override handleCheck( String path, String fullPath, String value, Options options, List<CheckStatus> result)502 public CheckCLDR handleCheck( 503 String path, String fullPath, String value, Options options, List<CheckStatus> result) { 504 if (value == null) { 505 return this; // skip 506 } 507 if (!accept(result)) return this; 508 // String testPrefix = "//ldml/units/unitLength[@type=\"narrow\"]"; 509 // if (path.startsWith(testPrefix)) { 510 // int i = 0; 511 // } 512 // Limits item0 = 513 // lookup.get("//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000000000\"][@count=\"other\"]"); 514 // item0.check("123456789", result, this); 515 516 Limit[] items = lookup.get(path); 517 if (items != null) { 518 CLDRFile.Status status = new CLDRFile.Status(); 519 this.getCldrFileToCheck().getSourceLocaleID(path, status); 520 // This was put in specifically to deal with the fact that we added a bunch of new units 521 // in CLDR 26 522 // and didn't put the narrow forms of them into modern coverage. If/when the narrow 523 // forms of all units 524 // are modern coverage, then we can safely remove the aliasedAndComprehensive check. 525 // Right now if an 526 // item is aliased and coverage is comprehensive, then it can't generate anything worse 527 // than a warning. 528 Boolean aliasedAndComprehensive = 529 (coverageLevel.getLevel(path).compareTo(Level.COMPREHENSIVE) == 0) 530 && (status.pathWhereFound.compareTo(path) != 0); 531 for (Limit item : items) { 532 if (item.hasProblem(path, value, result, this, aliasedAndComprehensive)) { 533 if (DEBUG && !found.contains(item)) { 534 found.add(item); 535 } 536 break; // only one error per item 537 } 538 } 539 } 540 return this; 541 } 542 543 @Override handleSetCldrFileToCheck( CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)544 public CheckCLDR handleSetCldrFileToCheck( 545 CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors) { 546 final String localeID = cldrFileToCheck.getLocaleID(); 547 supplementalData = 548 SupplementalDataInfo.getInstance(cldrFileToCheck.getSupplementalDirectory()); 549 coverageLevel = CoverageLevel2.getInstance(supplementalData, localeID); 550 551 super.handleSetCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 552 return this; 553 } 554 555 /** 556 * Provide a rough measure of how many unit components there are for the purpose of establishing 557 * a maximum width, with an special factor for non-metric. 558 */ 559 public static class UnitWidthUtil { 560 static final Pattern UNIT_PREFIX = 561 Pattern.compile( 562 "//ldml/units/unitLength\\[@type=\"([^\"]*)\"]/unit\\[@type=\"([^\\\"]*)\"]"); 563 final UnitConverter CONVERTER = SupplementalDataInfo.getInstance().getUnitConverter(); 564 final Set<String> validLongUnitIDs = 565 Validity.getInstance().getCodeToStatus(LstrType.unit).keySet(); 566 567 LoadingCache<String, Double> pathToUnitComponents = 568 CacheBuilder.newBuilder() 569 .build( 570 new CacheLoader<String, Double>() { 571 @Override 572 public Double load(String path) throws ExecutionException { 573 final Matcher matcher = UNIT_PREFIX.matcher(path); 574 if (matcher.lookingAt()) { 575 // String length = matcher.group(1); 576 String longUnitId = matcher.group(2); 577 return unitToComponents.get(longUnitId); 578 } else { 579 throw new ICUException("Internal error"); 580 } 581 } 582 }); 583 584 LoadingCache<String, Double> unitToComponents = 585 CacheBuilder.newBuilder() 586 .build( 587 new CacheLoader<String, Double>() { 588 @Override 589 public Double load(String longUnitId) { 590 double components = 0; 591 String shortId = CONVERTER.getShortId(longUnitId); 592 593 Set<String> systems = CONVERTER.getSystems(shortId); 594 int widthFactor = 595 systems.contains("metric") 596 && !shortId.endsWith("-metric") 597 ? 1 598 : 3; 599 // NOTE: allow cup-metric and pint-metric to be longer, 600 // since they aren't standard metric 601 602 // walk thorough the numerator and denominator to get the 603 // values 604 UnitId unitId = CONVERTER.createUnitId(shortId); 605 for (Entry<String, Integer> entry : 606 unitId.numUnitsToPowers.entrySet()) { 607 components += 608 getComponentCount( 609 entry.getKey(), entry.getValue()); 610 } 611 for (Entry<String, Integer> entry : 612 unitId.denUnitsToPowers.entrySet()) { 613 components += 614 getComponentCount( 615 entry.getKey(), entry.getValue()); 616 } 617 return widthFactor * components; 618 } 619 620 public double getComponentCount(String unit, Integer power) { 621 int result = 1; 622 if (power > 1) { 623 ++result; // add one component for a power 624 } 625 // hack for number 626 if (unit.startsWith("100-")) { 627 ++result; 628 unit = unit.substring(4); 629 } 630 Output<Rational> deprefix = new Output<>(); 631 unit = UnitConverter.stripPrefix(unit, deprefix); 632 if (!deprefix.value.equals(Rational.ONE)) { 633 ++result; // add 1 component for kilo, mega, etc. 634 } 635 for (int i = 0; i < unit.length(); ++i) { 636 if (unit.charAt(i) == '-') { 637 ++result; // add one component for -imperial, etc. 638 } 639 } 640 return result; 641 } 642 }); 643 UnitWidthUtil()644 private UnitWidthUtil() {} 645 getInstance()646 public static UnitWidthUtil getInstance() { 647 return new UnitWidthUtil(); 648 } 649 getRoughComponentMax(String path)650 public double getRoughComponentMax(String path) { 651 try { 652 return pathToUnitComponents.get(path); 653 } catch (ExecutionException e) { 654 throw new ICUException(e); 655 } 656 } 657 } 658 } 659