• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.test;
2 
3 import java.util.LinkedHashSet;
4 import java.util.List;
5 import java.util.Set;
6 import java.util.regex.Pattern;
7 
8 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
9 import org.unicode.cldr.util.ApproximateWidth;
10 import org.unicode.cldr.util.CLDRFile;
11 import org.unicode.cldr.util.Level;
12 import org.unicode.cldr.util.PatternCache;
13 import org.unicode.cldr.util.RegexLookup;
14 import org.unicode.cldr.util.SupplementalDataInfo;
15 
16 public class CheckWidths extends CheckCLDR {
17     // remember to add this class to the list in CheckCLDR.getCheckAll
18     // to run just this test, on just locales starting with 'nl', use CheckCLDR with -fnl.* -t.*CheckWidths.*
19     private static CoverageLevel2 coverageLevel;
20     private Level requiredLevel;
21 
22     /**
23      * Controls for the warning about too many components, and for when to cause error.
24      */
25     public static final int WARN_COMPONENTS_PER_ANNOTATION = 7;
26     public static final int MAX_COMPONENTS_PER_ANNOTATION = 16;
27 
28     SupplementalDataInfo supplementalData;
29 
30     private static final double EM = ApproximateWidth.getWidth("月");
31 
32     private static final boolean DEBUG = true;
33 
34     private enum Measure {
35         CODE_POINTS, DISPLAY_WIDTH, SET_ELEMENTS
36     }
37 
38     private enum LimitType {
39         MINIMUM, MAXIMUM
40     }
41 
42     private enum Special {
43         NONE, QUOTES, PLACEHOLDERS, NUMBERSYMBOLS, NUMBERFORMAT, BARS
44     }
45 
46     private static final Pattern PLACEHOLDER_PATTERN = PatternCache.get("\\{\\d\\}");
47 
48     private static class Limit {
49         final double warningReference;
50         final double errorReference;
51         final LimitType limit;
52         final Measure measure;
53         final Special special;
54         final String message;
55         final Subtype subtype;
56         final boolean debug;
57 
Limit(double warningReference, double errorReference, Measure measure, LimitType limit, Special special, boolean debug)58         public Limit(double warningReference, double errorReference, Measure measure, LimitType limit, Special special, boolean debug) {
59             this.debug = debug;
60             this.warningReference = warningReference;
61             this.errorReference = errorReference;
62             this.limit = limit;
63             this.measure = measure;
64             this.special = special;
65             switch (limit) {
66             case MINIMUM:
67                 this.subtype = Subtype.valueTooNarrow;
68                 switch (measure) {
69                 case CODE_POINTS:
70                     this.message = "Expected no fewer than {0} character(s), but was {1}.";
71                     break;
72                 case DISPLAY_WIDTH:
73                     this.message = "Too narrow by about {2}% (with common fonts).";
74                     break;
75                 default:
76                     throw new IllegalArgumentException();
77                 }
78                 break;
79             case MAXIMUM:
80                 switch (measure) {
81                 case CODE_POINTS:
82                     this.message = "Expected no more than {0} character(s), but was {1}.";
83                     this.subtype = Subtype.valueTooWide;
84                     break;
85                 case DISPLAY_WIDTH:
86                     this.message = "Too wide by about {2}% (with common fonts).";
87                     this.subtype = Subtype.valueTooWide;
88                     break;
89                 case SET_ELEMENTS:
90                     this.message = "Expected no more than {0} items(s), but was {1}.";
91                     this.subtype = Subtype.tooManyValues;
92                     break;
93                 default:
94                     throw new IllegalArgumentException();
95                 }
96                 break;
97             default:
98                 throw new IllegalArgumentException();
99             }
100         }
101 
Limit(double d, double e, Measure displayWidth, LimitType maximum, Special placeholders)102         public Limit(double d, double e, Measure displayWidth, LimitType maximum, Special placeholders) {
103             this(d, e, displayWidth, maximum, placeholders, false);
104         }
105 
hasProblem(String value, List<CheckStatus> result, CheckCLDR cause, Boolean aliasedAndComprenehsive)106         boolean hasProblem(String value, List<CheckStatus> result, CheckCLDR cause, Boolean aliasedAndComprenehsive) {
107             switch (special) {
108             case NUMBERFORMAT:
109                 String[] values = value.split(";", 2);
110                 // If it's a number format with positive and negative subpatterns, just check the longer one.
111                 value = (values.length == 2 && values[1].length() > values[0].length()) ? values[1] : values[0];
112                 value = value.replace("'", "");
113                 break;
114             case QUOTES:
115                 value = value.replace("'", "");
116                 break;
117             case PLACEHOLDERS:
118                 value = PLACEHOLDER_PATTERN.matcher(value).replaceAll("");
119                 break;
120             case NUMBERSYMBOLS:
121                 value = value.replaceAll("[\u200E\u200F\u061C]", ""); // don't include LRM/RLM/ALM when checking length of number symbols
122                 break;
123             case BARS:
124                 value = value.replaceAll("[^|]", "")+"|"; // Check the number of items by counting separators. Bit of a hack...
125                 break;
126             default:
127             }
128             double valueMeasure = measure == Measure.DISPLAY_WIDTH ? ApproximateWidth.getWidth(value)
129                 : value.codePointCount(0, value.length()) ;
130             CheckStatus.Type errorType = CheckStatus.warningType;
131             switch (limit) {
132             case MINIMUM:
133                 if (valueMeasure >= warningReference) {
134                     return false;
135                 }
136                 if (valueMeasure < errorReference && cause.getPhase() != Phase.BUILD && !aliasedAndComprenehsive) {
137                     errorType = CheckStatus.errorType;
138                 }
139                 break;
140             case MAXIMUM:
141                 if (valueMeasure <= warningReference) {
142                     return false;
143                 }
144                 if (valueMeasure > errorReference && cause.getPhase() != Phase.BUILD && !aliasedAndComprenehsive) {
145                     // Workaround for ST submission phase only per TC discussion 2018-05-30
146                     // Make too many keywords be only a warning until we decide policy (JCE)
147                     if (cause.getPhase() == Phase.SUBMISSION && measure.equals(Measure.SET_ELEMENTS)) {
148                         errorType = CheckStatus.warningType;
149                     } else {
150                         errorType = CheckStatus.errorType;
151                     }
152                 }
153                 break;
154             }
155             // the 115 is so that we don't show small percentages
156             // the /10 ...*10 is to round to multiples of 10% percent
157             double percent = (int) (Math.abs(115 * valueMeasure / warningReference - 100.0d) / 10 + 0.49999d) * 10;
158             result.add(new CheckStatus().setCause(cause)
159                 .setMainType(errorType)
160                 .setSubtype(subtype)
161                 .setMessage(message, warningReference, valueMeasure, percent));
162             return true;
163         }
164     }
165 
166     // WARNING: errors must occur before warnings!!
167     // we allow unusual units and English units to be a little longer
168     static final String ALLOW_LONGER = "(area-acre"
169         + "|area-square-foot"
170         + "|area-square-mile"
171         + "|length-foot"
172         + "|length-inch"
173         + "|length-mile"
174         + "|length-light-year"
175         + "|length-yard"
176         + "|mass-ounce"
177         + "|mass-pound"
178         + "|power-horsepower"
179         + "|pressure-inch-hg"
180         + "|pressure-millimeter-of-mercury"
181         + "|speed-mile-per-hour"
182         + "|temperature-fahrenheit"
183         + "|volume-cubic-mile"
184         + "|acceleration-g-force"
185         + "|speed-kilometer-per-hour"
186         + "|speed-meter-per-second"
187         + "|pressure-pound-per-square-inch"
188         + "|energy-therm-us"
189         + ")";
190 
191     static final String ALLOW_LONGEST = "consumption-liter-per-100kilometers";
192 
193     static RegexLookup<Limit[]> lookup = new RegexLookup<Limit[]>()
194         .setPatternTransform(RegexLookup.RegexFinderTransformPath)
195         .addVariable("%A", "\"[^\"]+\"")
196         .addVariable("%P", "\"[ap]m\"")
197         .addVariable("%Q", "[^ap].*|[ap][^m].*") // Anything but am or pm
198         .add("//ldml/delimiters/(quotation|alternateQuotation)", new Limit[] {
199             new Limit(1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NONE)
200         })
201 
202         // Numeric items should be no more than a single character
203 
204         .add("//ldml/numbers/symbols[@numberSystem=%A]/(decimal|group|minus|percent|perMille|plus)", new Limit[] {
205             new Limit(1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NUMBERSYMBOLS)
206         })
207 
208         // Now widths
209         // The following are rough measures, just to check strange cases
210 
211         .add("//ldml/characters/ellipsis[@type=\"(final|initial|medial)\"]", new Limit[] {
212             new Limit(2 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
213         })
214 
215         .add("//ldml/localeDisplayNames/localeDisplayPattern/", new Limit[] { // {0}: {1}, {0} ({1}), ,
216             new Limit(2 * EM, 3 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
217         })
218 
219         .add("//ldml/listPatterns/listPattern/listPatternPart[@type=%A]", new Limit[] { // {0} and {1}
220             new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
221         })
222 
223         .add("//ldml/dates/timeZoneNames/fallbackFormat", new Limit[] { // {1} ({0})
224             new Limit(2 * EM, 3 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
225         })
226 
227         .add("//ldml/dates/timeZoneNames/(regionFormat|hourFormat)", new Limit[] { // {0} Time,
228             // +HH:mm;-HH:mm
229             new Limit(10 * EM, 20 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
230         })
231 
232         .add("//ldml/dates/timeZoneNames/(gmtFormat|gmtZeroFormat)", new Limit[] { // GMT{0}, GMT
233             new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
234         })
235 
236         // Era Abbreviations
237 
238         // Allow longer for Japanese calendar eras
239         .add("//ldml/dates/calendars/calendar[@type=\"japanese\"]/.*/eraAbbr/era[@type=%A]", new Limit[] {
240             new Limit(12 * EM, 16 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
241         })
242         // Allow longer for ROC calendar eras
243         .add("//ldml/dates/calendars/calendar[@type=\"roc\"]/.*/eraAbbr/era[@type=%A]", new Limit[] {
244             new Limit(4 * EM, 8 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
245         })
246         .add("//ldml/dates/calendars/calendar.*/eraAbbr/era[@type=%A]", new Limit[] {
247             new Limit(3 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
248         })
249 
250         // am/pm abbreviated
251         .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%P]", new Limit[] {
252             new Limit(4 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
253         })
254         // other day periods abbreviated
255         .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%Q]", new Limit[] {
256             new Limit(8 * EM, 12 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
257         })
258         // am/pm wide
259         .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%P]", new Limit[] {
260             new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
261         })
262         // other day periods wide
263         .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%Q]", new Limit[] {
264             new Limit(10 * EM, 20 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
265         })
266 
267         // Narrow items
268 
269         .add("//ldml/dates/calendars/calendar.*[@type=\"narrow\"](?!/cyclic|/dayPeriod|/monthPattern)", new Limit[] {
270             new Limit(1.5 * EM, 2.25 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
271         })
272         // \"(?!am|pm)[^\"]+\"\\
273 
274         // Compact number formats
275 // pattern[@type="100000000000000"]
276         .add("//ldml/numbers/decimalFormats[@numberSystem=%A]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=%A]/pattern[@type=\"100000000000000",
277             new Limit[] {
278                 new Limit(4 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NUMBERFORMAT)
279         })
280         .add("//ldml/numbers/decimalFormats[@numberSystem=%A]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=%A]/pattern[@type=\"1",
281             new Limit[] {
282                 new Limit(4 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NUMBERFORMAT)
283         })
284         // Catch -future/past Narrow units  and allow much wider values
285         .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"[^\"]+-(future|past)\"]/unitPattern", new Limit[] {
286             new Limit(10 * EM, 15 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
287         })
288         // Catch widest units and allow a bit wider
289         .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"" + ALLOW_LONGEST + "\"]/unitPattern", new Limit[] {
290             new Limit(5 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
291         })
292         // Catch special units and allow a bit wider
293         .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"" + ALLOW_LONGER + "\"]/unitPattern", new Limit[] {
294             new Limit(4 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
295         })
296         // Narrow units
297         .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=%A]/unitPattern", new Limit[] {
298             new Limit(3 * EM, 4 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
299         })
300         // Short units
301         .add("//ldml/units/unitLength[@type=\"short\"]/unit[@type=%A]/unitPattern", new Limit[] {
302             new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
303         })
304 
305         // Currency Symbols
306         .add("//ldml/numbers/currencies/currency[@type=%A]/symbol", new Limit[] {
307             new Limit(3 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
308         })
309 
310         // "grinning cat face with smiling eyes" should be normal max ~= 160 em
311         // emoji names (not keywords)
312         .add("//ldml/annotations/annotation[@cp=%A][@type=%A]", new Limit[] {
313             new Limit(20 * EM, 100 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE),
314         })
315         .add("//ldml/annotations/annotation[@cp=%A]", new Limit[] {
316             new Limit(WARN_COMPONENTS_PER_ANNOTATION, MAX_COMPONENTS_PER_ANNOTATION, Measure.SET_ELEMENTS, LimitType.MAXIMUM, Special.BARS) // Allow up to 5 with no warning, up to 7 with no error.
317         })
318         ;
319 
320     static {
321         System.out.println("EMs: " + ApproximateWidth.getWidth("grinning cat face with smiling eyes"));
322     }
323 
324     Set<Limit> found = new LinkedHashSet<Limit>();
325 
326     @Override
handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)327     public CheckCLDR handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result) {
328         if (value == null) {
329             return this; // skip
330         }
331         //        String testPrefix = "//ldml/units/unitLength[@type=\"narrow\"]";
332         //        if (path.startsWith(testPrefix)) {
333         //            int i = 0;
334         //        }
335         // Limits item0 =
336         // lookup.get("//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000000000\"][@count=\"other\"]");
337         // item0.check("123456789", result, this);
338 
339         Limit[] items = lookup.get(path);
340         CLDRFile.Status status = new CLDRFile.Status();
341         this.getCldrFileToCheck().getSourceLocaleID(path, status);
342         // This was put in specifically to deal with the fact that we added a bunch of new units in CLDR 26
343         // and didn't put the narrow forms of them into modern coverage.  If/when the narrow forms of all units
344         // are modern coverage, then we can safely remove the aliasedAndComprehensive check.  Right now if an
345         // item is aliased and coverage is comprehensive, then it can't generate anything worse than a warning.
346         Boolean aliasedAndComprenehsive = (coverageLevel.getLevel(path).compareTo(Level.COMPREHENSIVE) == 0)
347             && (status.pathWhereFound.compareTo(path) != 0);
348         if (items != null) {
349             for (Limit item : items) {
350                 if (item.hasProblem(value, result, this, aliasedAndComprenehsive)) {
351                     if (DEBUG && !found.contains(item)) {
352                         found.add(item);
353                     }
354                     break; // only one error per item
355                 }
356             }
357         }
358         return this;
359     }
360 
setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)361     public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options,
362         List<CheckStatus> possibleErrors) {
363         final String localeID = cldrFileToCheck.getLocaleID();
364         supplementalData = SupplementalDataInfo.getInstance(cldrFileToCheck.getSupplementalDirectory());
365         coverageLevel = CoverageLevel2.getInstance(supplementalData, localeID);
366 
367         super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors);
368         return this;
369     }
370 }
371