1 package org.unicode.cldr.test; 2 3 import java.util.LinkedHashSet; 4 import java.util.List; 5 import java.util.Set; 6 import java.util.regex.Pattern; 7 8 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 9 import org.unicode.cldr.util.ApproximateWidth; 10 import org.unicode.cldr.util.CLDRFile; 11 import org.unicode.cldr.util.Level; 12 import org.unicode.cldr.util.PatternCache; 13 import org.unicode.cldr.util.RegexLookup; 14 import org.unicode.cldr.util.SupplementalDataInfo; 15 16 public class CheckWidths extends CheckCLDR { 17 // remember to add this class to the list in CheckCLDR.getCheckAll 18 // to run just this test, on just locales starting with 'nl', use CheckCLDR with -fnl.* -t.*CheckWidths.* 19 private static CoverageLevel2 coverageLevel; 20 private Level requiredLevel; 21 22 /** 23 * Controls for the warning about too many components, and for when to cause error. 24 */ 25 public static final int WARN_COMPONENTS_PER_ANNOTATION = 7; 26 public static final int MAX_COMPONENTS_PER_ANNOTATION = 16; 27 28 SupplementalDataInfo supplementalData; 29 30 private static final double EM = ApproximateWidth.getWidth("月"); 31 32 private static final boolean DEBUG = true; 33 34 private enum Measure { 35 CODE_POINTS, DISPLAY_WIDTH, SET_ELEMENTS 36 } 37 38 private enum LimitType { 39 MINIMUM, MAXIMUM 40 } 41 42 private enum Special { 43 NONE, QUOTES, PLACEHOLDERS, NUMBERSYMBOLS, NUMBERFORMAT, BARS 44 } 45 46 private static final Pattern PLACEHOLDER_PATTERN = PatternCache.get("\\{\\d\\}"); 47 48 private static class Limit { 49 final double warningReference; 50 final double errorReference; 51 final LimitType limit; 52 final Measure measure; 53 final Special special; 54 final String message; 55 final Subtype subtype; 56 final boolean debug; 57 Limit(double warningReference, double errorReference, Measure measure, LimitType limit, Special special, boolean debug)58 public Limit(double warningReference, double errorReference, Measure measure, LimitType limit, Special special, boolean debug) { 59 this.debug = debug; 60 this.warningReference = warningReference; 61 this.errorReference = errorReference; 62 this.limit = limit; 63 this.measure = measure; 64 this.special = special; 65 switch (limit) { 66 case MINIMUM: 67 this.subtype = Subtype.valueTooNarrow; 68 switch (measure) { 69 case CODE_POINTS: 70 this.message = "Expected no fewer than {0} character(s), but was {1}."; 71 break; 72 case DISPLAY_WIDTH: 73 this.message = "Too narrow by about {2}% (with common fonts)."; 74 break; 75 default: 76 throw new IllegalArgumentException(); 77 } 78 break; 79 case MAXIMUM: 80 switch (measure) { 81 case CODE_POINTS: 82 this.message = "Expected no more than {0} character(s), but was {1}."; 83 this.subtype = Subtype.valueTooWide; 84 break; 85 case DISPLAY_WIDTH: 86 this.message = "Too wide by about {2}% (with common fonts)."; 87 this.subtype = Subtype.valueTooWide; 88 break; 89 case SET_ELEMENTS: 90 this.message = "Expected no more than {0} items(s), but was {1}."; 91 this.subtype = Subtype.tooManyValues; 92 break; 93 default: 94 throw new IllegalArgumentException(); 95 } 96 break; 97 default: 98 throw new IllegalArgumentException(); 99 } 100 } 101 Limit(double d, double e, Measure displayWidth, LimitType maximum, Special placeholders)102 public Limit(double d, double e, Measure displayWidth, LimitType maximum, Special placeholders) { 103 this(d, e, displayWidth, maximum, placeholders, false); 104 } 105 hasProblem(String value, List<CheckStatus> result, CheckCLDR cause, Boolean aliasedAndComprenehsive)106 boolean hasProblem(String value, List<CheckStatus> result, CheckCLDR cause, Boolean aliasedAndComprenehsive) { 107 switch (special) { 108 case NUMBERFORMAT: 109 String[] values = value.split(";", 2); 110 // If it's a number format with positive and negative subpatterns, just check the longer one. 111 value = (values.length == 2 && values[1].length() > values[0].length()) ? values[1] : values[0]; 112 value = value.replace("'", ""); 113 break; 114 case QUOTES: 115 value = value.replace("'", ""); 116 break; 117 case PLACEHOLDERS: 118 value = PLACEHOLDER_PATTERN.matcher(value).replaceAll(""); 119 break; 120 case NUMBERSYMBOLS: 121 value = value.replaceAll("[\u200E\u200F\u061C]", ""); // don't include LRM/RLM/ALM when checking length of number symbols 122 break; 123 case BARS: 124 value = value.replaceAll("[^|]", "")+"|"; // Check the number of items by counting separators. Bit of a hack... 125 break; 126 default: 127 } 128 double valueMeasure = measure == Measure.DISPLAY_WIDTH ? ApproximateWidth.getWidth(value) 129 : value.codePointCount(0, value.length()) ; 130 CheckStatus.Type errorType = CheckStatus.warningType; 131 switch (limit) { 132 case MINIMUM: 133 if (valueMeasure >= warningReference) { 134 return false; 135 } 136 if (valueMeasure < errorReference && cause.getPhase() != Phase.BUILD && !aliasedAndComprenehsive) { 137 errorType = CheckStatus.errorType; 138 } 139 break; 140 case MAXIMUM: 141 if (valueMeasure <= warningReference) { 142 return false; 143 } 144 if (valueMeasure > errorReference && cause.getPhase() != Phase.BUILD && !aliasedAndComprenehsive) { 145 // Workaround for ST submission phase only per TC discussion 2018-05-30 146 // Make too many keywords be only a warning until we decide policy (JCE) 147 if (cause.getPhase() == Phase.SUBMISSION && measure.equals(Measure.SET_ELEMENTS)) { 148 errorType = CheckStatus.warningType; 149 } else { 150 errorType = CheckStatus.errorType; 151 } 152 } 153 break; 154 } 155 // the 115 is so that we don't show small percentages 156 // the /10 ...*10 is to round to multiples of 10% percent 157 double percent = (int) (Math.abs(115 * valueMeasure / warningReference - 100.0d) / 10 + 0.49999d) * 10; 158 result.add(new CheckStatus().setCause(cause) 159 .setMainType(errorType) 160 .setSubtype(subtype) 161 .setMessage(message, warningReference, valueMeasure, percent)); 162 return true; 163 } 164 } 165 166 // WARNING: errors must occur before warnings!! 167 // we allow unusual units and English units to be a little longer 168 static final String ALLOW_LONGER = "(area-acre" 169 + "|area-square-foot" 170 + "|area-square-mile" 171 + "|length-foot" 172 + "|length-inch" 173 + "|length-mile" 174 + "|length-light-year" 175 + "|length-yard" 176 + "|mass-ounce" 177 + "|mass-pound" 178 + "|power-horsepower" 179 + "|pressure-inch-hg" 180 + "|pressure-millimeter-of-mercury" 181 + "|speed-mile-per-hour" 182 + "|temperature-fahrenheit" 183 + "|volume-cubic-mile" 184 + "|acceleration-g-force" 185 + "|speed-kilometer-per-hour" 186 + "|speed-meter-per-second" 187 + "|pressure-pound-per-square-inch" 188 + "|energy-therm-us" 189 + ")"; 190 191 static final String ALLOW_LONGEST = "consumption-liter-per-100kilometers"; 192 193 static RegexLookup<Limit[]> lookup = new RegexLookup<Limit[]>() 194 .setPatternTransform(RegexLookup.RegexFinderTransformPath) 195 .addVariable("%A", "\"[^\"]+\"") 196 .addVariable("%P", "\"[ap]m\"") 197 .addVariable("%Q", "[^ap].*|[ap][^m].*") // Anything but am or pm 198 .add("//ldml/delimiters/(quotation|alternateQuotation)", new Limit[] { 199 new Limit(1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NONE) 200 }) 201 202 // Numeric items should be no more than a single character 203 204 .add("//ldml/numbers/symbols[@numberSystem=%A]/(decimal|group|minus|percent|perMille|plus)", new Limit[] { 205 new Limit(1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NUMBERSYMBOLS) 206 }) 207 208 // Now widths 209 // The following are rough measures, just to check strange cases 210 211 .add("//ldml/characters/ellipsis[@type=\"(final|initial|medial)\"]", new Limit[] { 212 new Limit(2 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 213 }) 214 215 .add("//ldml/localeDisplayNames/localeDisplayPattern/", new Limit[] { // {0}: {1}, {0} ({1}), , 216 new Limit(2 * EM, 3 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 217 }) 218 219 .add("//ldml/listPatterns/listPattern/listPatternPart[@type=%A]", new Limit[] { // {0} and {1} 220 new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 221 }) 222 223 .add("//ldml/dates/timeZoneNames/fallbackFormat", new Limit[] { // {1} ({0}) 224 new Limit(2 * EM, 3 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 225 }) 226 227 .add("//ldml/dates/timeZoneNames/(regionFormat|hourFormat)", new Limit[] { // {0} Time, 228 // +HH:mm;-HH:mm 229 new Limit(10 * EM, 20 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 230 }) 231 232 .add("//ldml/dates/timeZoneNames/(gmtFormat|gmtZeroFormat)", new Limit[] { // GMT{0}, GMT 233 new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 234 }) 235 236 // Era Abbreviations 237 238 // Allow longer for Japanese calendar eras 239 .add("//ldml/dates/calendars/calendar[@type=\"japanese\"]/.*/eraAbbr/era[@type=%A]", new Limit[] { 240 new Limit(12 * EM, 16 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 241 }) 242 // Allow longer for ROC calendar eras 243 .add("//ldml/dates/calendars/calendar[@type=\"roc\"]/.*/eraAbbr/era[@type=%A]", new Limit[] { 244 new Limit(4 * EM, 8 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 245 }) 246 .add("//ldml/dates/calendars/calendar.*/eraAbbr/era[@type=%A]", new Limit[] { 247 new Limit(3 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 248 }) 249 250 // am/pm abbreviated 251 .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%P]", new Limit[] { 252 new Limit(4 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 253 }) 254 // other day periods abbreviated 255 .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%Q]", new Limit[] { 256 new Limit(8 * EM, 12 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 257 }) 258 // am/pm wide 259 .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%P]", new Limit[] { 260 new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 261 }) 262 // other day periods wide 263 .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%Q]", new Limit[] { 264 new Limit(10 * EM, 20 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 265 }) 266 267 // Narrow items 268 269 .add("//ldml/dates/calendars/calendar.*[@type=\"narrow\"](?!/cyclic|/dayPeriod|/monthPattern)", new Limit[] { 270 new Limit(1.5 * EM, 2.25 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 271 }) 272 // \"(?!am|pm)[^\"]+\"\\ 273 274 // Compact number formats 275 // pattern[@type="100000000000000"] 276 .add("//ldml/numbers/decimalFormats[@numberSystem=%A]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=%A]/pattern[@type=\"100000000000000", 277 new Limit[] { 278 new Limit(4 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NUMBERFORMAT) 279 }) 280 .add("//ldml/numbers/decimalFormats[@numberSystem=%A]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=%A]/pattern[@type=\"1", 281 new Limit[] { 282 new Limit(4 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NUMBERFORMAT) 283 }) 284 // Catch -future/past Narrow units and allow much wider values 285 .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"[^\"]+-(future|past)\"]/unitPattern", new Limit[] { 286 new Limit(10 * EM, 15 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 287 }) 288 // Catch widest units and allow a bit wider 289 .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"" + ALLOW_LONGEST + "\"]/unitPattern", new Limit[] { 290 new Limit(5 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 291 }) 292 // Catch special units and allow a bit wider 293 .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"" + ALLOW_LONGER + "\"]/unitPattern", new Limit[] { 294 new Limit(4 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 295 }) 296 // Narrow units 297 .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=%A]/unitPattern", new Limit[] { 298 new Limit(3 * EM, 4 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 299 }) 300 // Short units 301 .add("//ldml/units/unitLength[@type=\"short\"]/unit[@type=%A]/unitPattern", new Limit[] { 302 new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 303 }) 304 305 // Currency Symbols 306 .add("//ldml/numbers/currencies/currency[@type=%A]/symbol", new Limit[] { 307 new Limit(3 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 308 }) 309 310 // "grinning cat face with smiling eyes" should be normal max ~= 160 em 311 // emoji names (not keywords) 312 .add("//ldml/annotations/annotation[@cp=%A][@type=%A]", new Limit[] { 313 new Limit(20 * EM, 100 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE), 314 }) 315 .add("//ldml/annotations/annotation[@cp=%A]", new Limit[] { 316 new Limit(WARN_COMPONENTS_PER_ANNOTATION, MAX_COMPONENTS_PER_ANNOTATION, Measure.SET_ELEMENTS, LimitType.MAXIMUM, Special.BARS) // Allow up to 5 with no warning, up to 7 with no error. 317 }) 318 ; 319 320 static { 321 System.out.println("EMs: " + ApproximateWidth.getWidth("grinning cat face with smiling eyes")); 322 } 323 324 Set<Limit> found = new LinkedHashSet<Limit>(); 325 326 @Override handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)327 public CheckCLDR handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result) { 328 if (value == null) { 329 return this; // skip 330 } 331 // String testPrefix = "//ldml/units/unitLength[@type=\"narrow\"]"; 332 // if (path.startsWith(testPrefix)) { 333 // int i = 0; 334 // } 335 // Limits item0 = 336 // lookup.get("//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000000000\"][@count=\"other\"]"); 337 // item0.check("123456789", result, this); 338 339 Limit[] items = lookup.get(path); 340 CLDRFile.Status status = new CLDRFile.Status(); 341 this.getCldrFileToCheck().getSourceLocaleID(path, status); 342 // This was put in specifically to deal with the fact that we added a bunch of new units in CLDR 26 343 // and didn't put the narrow forms of them into modern coverage. If/when the narrow forms of all units 344 // are modern coverage, then we can safely remove the aliasedAndComprehensive check. Right now if an 345 // item is aliased and coverage is comprehensive, then it can't generate anything worse than a warning. 346 Boolean aliasedAndComprenehsive = (coverageLevel.getLevel(path).compareTo(Level.COMPREHENSIVE) == 0) 347 && (status.pathWhereFound.compareTo(path) != 0); 348 if (items != null) { 349 for (Limit item : items) { 350 if (item.hasProblem(value, result, this, aliasedAndComprenehsive)) { 351 if (DEBUG && !found.contains(item)) { 352 found.add(item); 353 } 354 break; // only one error per item 355 } 356 } 357 } 358 return this; 359 } 360 setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)361 public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, 362 List<CheckStatus> possibleErrors) { 363 final String localeID = cldrFileToCheck.getLocaleID(); 364 supplementalData = SupplementalDataInfo.getInstance(cldrFileToCheck.getSupplementalDirectory()); 365 coverageLevel = CoverageLevel2.getInstance(supplementalData, localeID); 366 367 super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 368 return this; 369 } 370 } 371