1 package org.unicode.cldr.test; 2 3 import java.util.LinkedHashSet; 4 import java.util.List; 5 import java.util.Set; 6 import java.util.regex.Pattern; 7 8 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 9 import org.unicode.cldr.util.ApproximateWidth; 10 import org.unicode.cldr.util.CLDRFile; 11 import org.unicode.cldr.util.Level; 12 import org.unicode.cldr.util.PatternCache; 13 import org.unicode.cldr.util.RegexLookup; 14 import org.unicode.cldr.util.SupplementalDataInfo; 15 16 public class CheckWidths extends CheckCLDR { 17 // remember to add this class to the list in CheckCLDR.getCheckAll 18 // to run just this test, on just locales starting with 'nl', use CheckCLDR with -fnl.* -t.*CheckWidths.* 19 private static CoverageLevel2 coverageLevel; 20 private Level requiredLevel; 21 22 public static final int MAX_COMPONENTS_PER_ANNOTATION = 16; 23 24 SupplementalDataInfo supplementalData; 25 26 private static final double EM = ApproximateWidth.getWidth("月"); 27 28 private static final boolean DEBUG = true; 29 30 private enum Measure { 31 CODE_POINTS, DISPLAY_WIDTH, SET_ELEMENTS 32 } 33 34 private enum LimitType { 35 MINIMUM, MAXIMUM 36 } 37 38 private enum Special { 39 NONE, QUOTES, PLACEHOLDERS, NUMBERSYMBOLS, NUMBERFORMAT, BARS 40 } 41 42 private static final Pattern PLACEHOLDER_PATTERN = PatternCache.get("\\{\\d\\}"); 43 44 private static class Limit { 45 final double warningReference; 46 final double errorReference; 47 final LimitType limit; 48 final Measure measure; 49 final Special special; 50 final String message; 51 final Subtype subtype; 52 final boolean debug; 53 Limit(double warningReference, double errorReference, Measure measure, LimitType limit, Special special, boolean debug)54 public Limit(double warningReference, double errorReference, Measure measure, LimitType limit, Special special, boolean debug) { 55 this.debug = debug; 56 this.warningReference = warningReference; 57 this.errorReference = errorReference; 58 this.limit = limit; 59 this.measure = measure; 60 this.special = special; 61 switch (limit) { 62 case MINIMUM: 63 this.subtype = Subtype.valueTooNarrow; 64 switch (measure) { 65 case CODE_POINTS: 66 this.message = "Expected no fewer than {0} character(s), but was {1}."; 67 break; 68 case DISPLAY_WIDTH: 69 this.message = "Too narrow by about {2}% (with common fonts)."; 70 break; 71 default: 72 throw new IllegalArgumentException(); 73 } 74 break; 75 case MAXIMUM: 76 switch (measure) { 77 case CODE_POINTS: 78 this.message = "Expected no more than {0} character(s), but was {1}."; 79 this.subtype = Subtype.valueTooWide; 80 break; 81 case DISPLAY_WIDTH: 82 this.message = "Too wide by about {2}% (with common fonts)."; 83 this.subtype = Subtype.valueTooWide; 84 break; 85 case SET_ELEMENTS: 86 this.message = "Expected no more than {0} items(s), but was {1}."; 87 this.subtype = Subtype.tooManyValues; 88 break; 89 default: 90 throw new IllegalArgumentException(); 91 } 92 break; 93 default: 94 throw new IllegalArgumentException(); 95 } 96 } 97 Limit(double d, double e, Measure displayWidth, LimitType maximum, Special placeholders)98 public Limit(double d, double e, Measure displayWidth, LimitType maximum, Special placeholders) { 99 this(d, e, displayWidth, maximum, placeholders, false); 100 } 101 hasProblem(String value, List<CheckStatus> result, CheckCLDR cause, Boolean aliasedAndComprenehsive)102 boolean hasProblem(String value, List<CheckStatus> result, CheckCLDR cause, Boolean aliasedAndComprenehsive) { 103 switch (special) { 104 case NUMBERFORMAT: 105 String[] values = value.split(";", 2); 106 // If it's a number format with positive and negative subpatterns, just check the longer one. 107 value = (values.length == 2 && values[1].length() > values[0].length()) ? values[1] : values[0]; 108 value = value.replace("'", ""); 109 break; 110 case QUOTES: 111 value = value.replace("'", ""); 112 break; 113 case PLACEHOLDERS: 114 value = PLACEHOLDER_PATTERN.matcher(value).replaceAll(""); 115 break; 116 case NUMBERSYMBOLS: 117 value = value.replaceAll("[\u200E\u200F\u061C]", ""); // don't include LRM/RLM/ALM when checking length of number symbols 118 break; 119 case BARS: 120 value = value.replaceAll("[^|]", "")+"|"; // Check the number of items by counting separators. Bit of a hack... 121 break; 122 default: 123 } 124 double valueMeasure = measure == Measure.DISPLAY_WIDTH ? ApproximateWidth.getWidth(value) 125 : value.codePointCount(0, value.length()) ; 126 CheckStatus.Type errorType = CheckStatus.warningType; 127 switch (limit) { 128 case MINIMUM: 129 if (valueMeasure >= warningReference) { 130 return false; 131 } 132 if (valueMeasure < errorReference && cause.getPhase() != Phase.BUILD && !aliasedAndComprenehsive) { 133 errorType = CheckStatus.errorType; 134 } 135 break; 136 case MAXIMUM: 137 if (valueMeasure <= warningReference) { 138 return false; 139 } 140 if (valueMeasure > errorReference && cause.getPhase() != Phase.BUILD && !aliasedAndComprenehsive) { 141 // Workaround for ST submission phase only per TC discussion 2018-05-30 142 // Make too many keywords be only a warning until we decide policy (JCE) 143 if (cause.getPhase() == Phase.SUBMISSION && measure.equals(Measure.SET_ELEMENTS)) { 144 errorType = CheckStatus.warningType; 145 } else { 146 errorType = CheckStatus.errorType; 147 } 148 } 149 break; 150 } 151 // the 115 is so that we don't show small percentages 152 // the /10 ...*10 is to round to multiples of 10% percent 153 double percent = (int) (Math.abs(115 * valueMeasure / warningReference - 100.0d) / 10 + 0.49999d) * 10; 154 result.add(new CheckStatus().setCause(cause) 155 .setMainType(errorType) 156 .setSubtype(subtype) 157 .setMessage(message, warningReference, valueMeasure, percent)); 158 return true; 159 } 160 } 161 162 // WARNING: errors must occur before warnings!! 163 // we allow unusual units and English units to be a little longer 164 static final String ALLOW_LONGER = "(area-acre" + 165 "|area-square-foot" + 166 "|area-square-mile" + 167 "|length-foot" + 168 "|length-inch" + 169 "|length-mile" + 170 "|length-light-year" + 171 "|length-yard" + 172 "|mass-ounce" + 173 "|mass-pound" + 174 "|power-horsepower" + 175 "|pressure-inch-hg" + 176 "|pressure-millimeter-of-mercury" + 177 "|speed-mile-per-hour" + 178 "|temperature-fahrenheit" + 179 "|volume-cubic-mile" + 180 "|acceleration-g-force" + 181 "|speed-kilometer-per-hour" + 182 "|speed-meter-per-second" + 183 "|pressure-pound-per-square-inch" + 184 ")"; 185 186 static final String ALLOW_LONGEST = "consumption-liter-per-100kilometers"; 187 188 static RegexLookup<Limit[]> lookup = new RegexLookup<Limit[]>() 189 .setPatternTransform(RegexLookup.RegexFinderTransformPath) 190 .addVariable("%A", "\"[^\"]+\"") 191 .addVariable("%P", "\"[ap]m\"") 192 .addVariable("%Q", "[^ap].*|[ap][^m].*") // Anything but am or pm 193 .add("//ldml/delimiters/(quotation|alternateQuotation)", new Limit[] { 194 new Limit(1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NONE) 195 }) 196 197 // Numeric items should be no more than a single character 198 199 .add("//ldml/numbers/symbols[@numberSystem=%A]/(decimal|group|minus|percent|perMille|plus)", new Limit[] { 200 new Limit(1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NUMBERSYMBOLS) 201 }) 202 203 // Now widths 204 // The following are rough measures, just to check strange cases 205 206 .add("//ldml/characters/ellipsis[@type=\"(final|initial|medial)\"]", new Limit[] { 207 new Limit(2 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 208 }) 209 210 .add("//ldml/localeDisplayNames/localeDisplayPattern/", new Limit[] { // {0}: {1}, {0} ({1}), , 211 new Limit(2 * EM, 3 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 212 }) 213 214 .add("//ldml/listPatterns/listPattern/listPatternPart[@type=%A]", new Limit[] { // {0} and {1} 215 new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 216 }) 217 218 .add("//ldml/dates/timeZoneNames/fallbackFormat", new Limit[] { // {1} ({0}) 219 new Limit(2 * EM, 3 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 220 }) 221 222 .add("//ldml/dates/timeZoneNames/(regionFormat|hourFormat)", new Limit[] { // {0} Time, 223 // +HH:mm;-HH:mm 224 new Limit(10 * EM, 20 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 225 }) 226 227 .add("//ldml/dates/timeZoneNames/(gmtFormat|gmtZeroFormat)", new Limit[] { // GMT{0}, GMT 228 new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 229 }) 230 231 // Era Abbreviations 232 233 // Allow longer for Japanese calendar eras 234 .add("//ldml/dates/calendars/calendar[@type=\"japanese\"]/.*/eraAbbr/era[@type=%A]", new Limit[] { 235 new Limit(12 * EM, 16 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 236 }) 237 // Allow longer for ROC calendar eras 238 .add("//ldml/dates/calendars/calendar[@type=\"roc\"]/.*/eraAbbr/era[@type=%A]", new Limit[] { 239 new Limit(4 * EM, 8 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 240 }) 241 .add("//ldml/dates/calendars/calendar.*/eraAbbr/era[@type=%A]", new Limit[] { 242 new Limit(3 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 243 }) 244 245 // am/pm abbreviated 246 .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%P]", new Limit[] { 247 new Limit(4 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 248 }) 249 // other day periods abbreviated 250 .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%Q]", new Limit[] { 251 new Limit(8 * EM, 12 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 252 }) 253 // am/pm wide 254 .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%P]", new Limit[] { 255 new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 256 }) 257 // other day periods wide 258 .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%Q]", new Limit[] { 259 new Limit(10 * EM, 20 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 260 }) 261 262 // Narrow items 263 264 .add("//ldml/dates/calendars/calendar.*[@type=\"narrow\"](?!/cyclic|/dayPeriod|/monthPattern)", new Limit[] { 265 new Limit(1.5 * EM, 2.25 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE) 266 }) 267 // \"(?!am|pm)[^\"]+\"\\ 268 269 // Compact number formats 270 271 .add("//ldml/numbers/decimalFormats[@numberSystem=%A]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=%A]/pattern[@type=\"1", 272 new Limit[] { 273 new Limit(4 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NUMBERFORMAT) 274 }) 275 // Catch -future/past Narrow units and allow much wider values 276 .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"[^\"]+-(future|past)\"]/unitPattern", new Limit[] { 277 new Limit(10 * EM, 15 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 278 }) 279 // Catch widest units and allow a bit wider 280 .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"" + ALLOW_LONGEST + "\"]/unitPattern", new Limit[] { 281 new Limit(5 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 282 }) 283 // Catch special units and allow a bit wider 284 .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"" + ALLOW_LONGER + "\"]/unitPattern", new Limit[] { 285 new Limit(4 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 286 }) 287 // Narrow units 288 .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=%A]/unitPattern", new Limit[] { 289 new Limit(3 * EM, 4 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 290 }) 291 // Short units 292 .add("//ldml/units/unitLength[@type=\"short\"]/unit[@type=%A]/unitPattern", new Limit[] { 293 new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 294 }) 295 296 // Currency Symbols 297 .add("//ldml/numbers/currencies/currency[@type=%A]/symbol", new Limit[] { 298 new Limit(3 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS) 299 }) 300 301 // "grinning cat face with smiling eyes" should be normal max ~= 160 em 302 // emoji names (not keywords) 303 .add("//ldml/annotations/annotation[@cp=%A][@type=%A]", new Limit[] { 304 new Limit(20 * EM, 100 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE), 305 }) 306 .add("//ldml/annotations/annotation[@cp=%A]", new Limit[] { 307 new Limit(5, MAX_COMPONENTS_PER_ANNOTATION, Measure.SET_ELEMENTS, LimitType.MAXIMUM, Special.BARS) // Allow up to 5 with no warning, up to 7 with no error. 308 }) 309 ; 310 311 static { 312 System.out.println("EMs: " + ApproximateWidth.getWidth("grinning cat face with smiling eyes")); 313 } 314 315 Set<Limit> found = new LinkedHashSet<Limit>(); 316 317 @Override handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)318 public CheckCLDR handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result) { 319 if (value == null) { 320 return this; // skip 321 } 322 // String testPrefix = "//ldml/units/unitLength[@type=\"narrow\"]"; 323 // if (path.startsWith(testPrefix)) { 324 // int i = 0; 325 // } 326 // Limits item0 = 327 // lookup.get("//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000000000\"][@count=\"other\"]"); 328 // item0.check("123456789", result, this); 329 330 Limit[] items = lookup.get(path); 331 CLDRFile.Status status = new CLDRFile.Status(); 332 this.getCldrFileToCheck().getSourceLocaleID(path, status); 333 // This was put in specifically to deal with the fact that we added a bunch of new units in CLDR 26 334 // and didn't put the narrow forms of them into modern coverage. If/when the narrow forms of all units 335 // are modern coverage, then we can safely remove the aliasedAndComprehensive check. Right now if an 336 // item is aliased and coverage is comprehensive, then it can't generate anything worse than a warning. 337 Boolean aliasedAndComprenehsive = (coverageLevel.getLevel(path).compareTo(Level.COMPREHENSIVE) == 0) 338 && (status.pathWhereFound.compareTo(path) != 0); 339 if (items != null) { 340 for (Limit item : items) { 341 if (item.hasProblem(value, result, this, aliasedAndComprenehsive)) { 342 if (DEBUG && !found.contains(item)) { 343 found.add(item); 344 } 345 break; // only one error per item 346 } 347 } 348 } 349 return this; 350 } 351 setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)352 public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, 353 List<CheckStatus> possibleErrors) { 354 final String localeID = cldrFileToCheck.getLocaleID(); 355 supplementalData = SupplementalDataInfo.getInstance(cldrFileToCheck.getSupplementalDirectory()); 356 coverageLevel = CoverageLevel2.getInstance(supplementalData, localeID); 357 358 super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 359 return this; 360 } 361 } 362