• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2005-2012, International Business Machines Corporation and        *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.test;
8 
9 import java.util.BitSet;
10 import java.util.Date;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.LinkedHashSet;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Set;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19 
20 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
21 import org.unicode.cldr.util.CLDRFile;
22 import org.unicode.cldr.util.CLDRFile.Status;
23 import org.unicode.cldr.util.Factory;
24 import org.unicode.cldr.util.InternalCldrException;
25 import org.unicode.cldr.util.LocaleIDParser;
26 import org.unicode.cldr.util.PatternCache;
27 import org.unicode.cldr.util.PatternPlaceholders;
28 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo;
29 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus;
30 import org.unicode.cldr.util.SupplementalDataInfo;
31 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
32 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
33 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
34 import org.unicode.cldr.util.UnicodeSetPrettyPrinter;
35 import org.unicode.cldr.util.XMLSource;
36 import org.unicode.cldr.util.XPathParts;
37 
38 import com.google.common.base.Joiner;
39 import com.google.common.collect.Multiset;
40 import com.google.common.collect.Multiset.Entry;
41 import com.google.common.collect.TreeMultiset;
42 import com.ibm.icu.impl.Relation;
43 import com.ibm.icu.lang.UScript;
44 import com.ibm.icu.text.Collator;
45 import com.ibm.icu.text.DateTimePatternGenerator;
46 import com.ibm.icu.text.Normalizer2;
47 import com.ibm.icu.text.PluralRules;
48 import com.ibm.icu.text.Transform;
49 import com.ibm.icu.text.UnicodeSet;
50 import com.ibm.icu.util.ULocale;
51 
52 public class CheckForExemplars extends FactoryCheckCLDR {
53     private static final UnicodeSet RTL_CONTROLS = new UnicodeSet("[\\u061C\\u200E\\u200F\\u202A-\\u202D\\u2066-\\u2069]");
54 
55     private static final UnicodeSet RTL = new UnicodeSet("[[:bc=AL:][:bc=R:]]");
56 
57     private static final String STAND_IN = "#";
58 
59     // private final UnicodeSet commonAndInherited = new UnicodeSet(CheckExemplars.Allowed).complement();
60     // "[[:script=common:][:script=inherited:][:alphabetic=false:]]");
61     static String[] EXEMPLAR_SKIPS = {
62         "/currencySpacing",
63         "/exemplarCharacters",
64         // "/pattern",
65         "/localizedPatternChars",
66         "/segmentations",
67         "/references",
68         "/localeDisplayNames/variants/",
69         "/commonlyUsed",
70         "/defaultNumberingSystem",
71         "/otherNumberingSystems",
72         "/exponential",
73         "/nan",
74         "/scientificFormats",
75         "/inText",
76         "/orientation",
77         "/symbol[@alt=\"narrow\"]",
78         "/characters/parseLenients"
79     };
80 
81     static String[] DATE_PARTS = {
82         "/hourFormat",
83         "/dateFormatItem",
84         "/intervalFormatItem",
85         "/dateFormatLength",
86         "timeFormatLength"
87     };
88 
89     static final UnicodeSet START_PAREN = new UnicodeSet("[[:Ps:]]").freeze();
90     static final UnicodeSet END_PAREN = new UnicodeSet("[[:Pe:]]").freeze();
91     static final UnicodeSet ALL_CURRENCY_SYMBOLS = new UnicodeSet("[[:Sc:]]").freeze();
92     static final UnicodeSet LETTER = new UnicodeSet("[[A-Za-z]]").freeze();
93     static final UnicodeSet NUMBERS = new UnicodeSet("[[:N:]]").freeze();
94     static final UnicodeSet DISALLOWED_HOUR_FORMAT = new UnicodeSet("[[:letter:]]").remove('H').remove('m').freeze();
95     static final UnicodeSet DISALLOWED_IN_RANGE = new UnicodeSet("[:L:]").freeze();
96 
97     private UnicodeSet exemplars;
98     private UnicodeSet exemplarsPlusAscii;
99     //private static final UnicodeSet DISALLOWED_IN_scriptRegionExemplars = new UnicodeSet("[()();,;,]").freeze();
100     //private static final UnicodeSet DISALLOWED_IN_scriptRegionExemplarsWithParens = new UnicodeSet("[;,;,]").freeze();
101 
102     // Hack until cldrbug 6566 is fixed. TODO
103     private static final Pattern IGNORE_PLACEHOLDER_PARENTHESES = PatternCache.get("\\p{Ps}#\\p{Pe}");
104 
105     // private UnicodeSet currencySymbolExemplars;
106     private boolean skip;
107     private Collator col;
108     private Collator spaceCol;
109     UnicodeSetPrettyPrinter prettyPrint;
110     private Status otherPathStatus = new Status();
111     private Matcher patternMatcher = ExampleGenerator.PARAMETER.matcher("");
112     private boolean errorDefaultOption;
113 
114     // for extracting date pattern text
115     private DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
116     StringBuilder justText = new StringBuilder();
117 
118     // public static final Pattern SUPPOSED_TO_BE_MESSAGE_FORMAT_PATTERN = PatternCache.get("/(" +
119     // "codePattern" +
120     // "|dateRangePattern" +
121     // "|dateTimeFormat[^/]*?/pattern" +
122     // "|appendItem" +
123     // "|intervalFormatFallback" +
124     // "|hoursFormat" +
125     // "|gmtFormat" +
126     // "|regionFormat" +
127     // "|fallbackFormat" +
128     // "|unitPattern.*@count=\"(zero|one|two|few|many|other)\"" +
129     // "|localePattern" +
130     // "|localeKeyTypePattern" +
131     // "|listPatternPart" +
132     // "|ellipsis" +
133     // "|monthPattern" +
134     // ")");
135     // private Matcher supposedToBeMessageFormat = SUPPOSED_TO_BE_MESSAGE_FORMAT_PATTERN.matcher("");
136 
137     public static final Pattern LEAD_OR_TRAIL_WHITESPACE_OK = PatternCache.get("/(" +
138         "references/reference" +
139         "|insertBetween" +
140         ")");
141     private Matcher leadOrTrailWhitespaceOk = LEAD_OR_TRAIL_WHITESPACE_OK.matcher("");
142 
143     private static UnicodeSet ASCII = new UnicodeSet("[\\u0020-\\u007F]").freeze();
144 
145     private PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance();
146     private SupplementalDataInfo sdi;
147     private Relation scriptToCurrencies;
148 
CheckForExemplars(Factory factory)149     public CheckForExemplars(Factory factory) {
150         super(factory);
151         // patternPlaceholders = RegexLookup.of(new PlaceholderTransform())
152         // .loadFromFile(PatternPlaceholders.class, "data/Placeholders.txt");
153         sdi = SupplementalDataInfo.getInstance();
154     }
155 
156     /**
157      * Adapted from GenerateXMB.MapTransform
158      *
159      * @author jchye
160      *
161      */
162     static class PlaceholderTransform implements Transform<String, Set<String>> {
163         @Override
transform(String source)164         public Set<String> transform(String source) {
165             Set<String> placeholders = new LinkedHashSet<>();
166             String[] parts = source.split(";\\s+");
167             for (String part : parts) {
168                 int equalsPos = part.indexOf('=');
169                 String placeholder = part.substring(0, equalsPos).trim();
170                 placeholders.add(placeholder);
171             }
172             return placeholders;
173         }
174     }
175 
176     @Override
setCldrFileToCheck(CLDRFile cldrFile, Options options, List<CheckStatus> possibleErrors)177     public CheckCLDR setCldrFileToCheck(CLDRFile cldrFile, Options options, List<CheckStatus> possibleErrors) {
178         if (cldrFile == null) return this;
179         skip = true;
180         super.setCldrFileToCheck(cldrFile, options, possibleErrors);
181         if (cldrFile.getLocaleID().equals("root")) {
182             return this;
183         }
184 
185         errorDefaultOption = options.get(Options.Option.exemplarErrors) != null;
186 
187         String locale = cldrFile.getLocaleID();
188         col = Collator.getInstance(new ULocale(locale));
189         spaceCol = Collator.getInstance(new ULocale(locale));
190         spaceCol.setStrength(Collator.PRIMARY);
191 
192         CLDRFile resolvedFile = getResolvedCldrFileToCheck();
193         boolean[] ok = new boolean[1];
194         exemplars = safeGetExemplars("", possibleErrors, resolvedFile, ok);
195 
196         if (exemplars == null) {
197             CheckStatus item = new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
198                 .setSubtype(Subtype.noExemplarCharacters)
199                 .setMessage("No Exemplar Characters: {0}", new Object[] { this.getClass().getName() });
200             possibleErrors.add(item);
201             return this;
202         } else if (!ok[0]) {
203             exemplars = new UnicodeSet();
204         } else {
205             exemplars = new UnicodeSet(exemplars); // modifiable copy
206         }
207 
208         boolean isRTL = RTL.containsSome(exemplars);
209         if (isRTL) {
210             exemplars.addAll(RTL_CONTROLS);
211         }
212         // UnicodeSet temp = resolvedFile.getExemplarSet("standard");
213         // if (temp != null) exemplars.addAll(temp);
214         UnicodeSet auxiliary = safeGetExemplars("auxiliary", possibleErrors, resolvedFile, ok); // resolvedFile.getExemplarSet("auxiliary",
215         // CLDRFile.WinningChoice.WINNING);
216         if (auxiliary != null) {
217             exemplars.addAll(auxiliary);
218         }
219 
220         if (CheckExemplars.USE_PUNCTUATION) {
221             UnicodeSet punctuation = safeGetExemplars("punctuation", possibleErrors, resolvedFile, ok); // resolvedFile.getExemplarSet("auxiliary",
222             if (punctuation != null) {
223                 exemplars.addAll(punctuation);
224             }
225 
226             UnicodeSet numbers = getNumberSystemExemplars();
227             exemplars.addAll(numbers);
228 
229             // TODO fix replacement character
230             exemplars.add(STAND_IN);
231         }
232 
233         exemplars.addAll(CheckExemplars.AlwaysOK).freeze();
234         exemplarsPlusAscii = new UnicodeSet(exemplars).addAll(ASCII).freeze();
235 
236         skip = false;
237         prettyPrint = new UnicodeSetPrettyPrinter()
238             .setOrdering(col != null ? col : Collator.getInstance(ULocale.ROOT))
239             .setSpaceComparator(col != null ? col : Collator.getInstance(ULocale.ROOT)
240                 .setStrength2(Collator.PRIMARY))
241             .setCompressRanges(true);
242         return this;
243     }
244 
getNumberSystemExemplars()245     private UnicodeSet getNumberSystemExemplars() {
246         String numberSystem = getCldrFileToCheck().getStringValue("//ldml/numbers/defaultNumberingSystem");
247         String digits = sdi.getDigits(numberSystem);
248         return new UnicodeSet().addAll(digits);
249     }
250 
safeGetExemplars(String type, List<CheckStatus> possibleErrors, CLDRFile resolvedFile, boolean[] ok)251     private UnicodeSet safeGetExemplars(String type, List<CheckStatus> possibleErrors, CLDRFile resolvedFile,
252         boolean[] ok) {
253         UnicodeSet result = null;
254         try {
255             result = resolvedFile.getExemplarSet(type, CLDRFile.WinningChoice.WINNING);
256             ok[0] = true;
257         } catch (IllegalArgumentException iae) {
258             possibleErrors.add(new CheckStatus()
259                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.couldNotAccessExemplars)
260                 .setMessage("Could not get exemplar set: " + iae.toString()));
261             ok[0] = false;
262         }
263         return result;
264     }
265 
266     @Override
handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)267     public CheckCLDR handleCheck(String path, String fullPath, String value,
268         Options options, List<CheckStatus> result) {
269         if (fullPath == null) return this; // skip paths that we don't have
270         if (value == null) return this; // skip values that we don't have ?
271         if (skip) return this;
272         if (path == null) {
273             throw new InternalCldrException("Empty path!");
274         } else if (getCldrFileToCheck() == null) {
275             throw new InternalCldrException("no file to check!");
276         }
277         String sourceLocale = getResolvedCldrFileToCheck().getSourceLocaleID(path, otherPathStatus);
278 
279         // if we are an alias to another path, then skip
280         // if (!path.equals(otherPathStatus.pathWhereFound)) {
281         // return this;
282         // }
283 
284         // now check locale source
285         if (XMLSource.CODE_FALLBACK_ID.equals(sourceLocale)) {
286             return this;
287             // } else if ("root".equals(sourceLocale)) {
288             // // skip eras for non-gregorian
289             // if (true) return this;
290             // if (path.indexOf("/calendar") >= 0 && path.indexOf("gregorian") <= 0) return this;
291         }
292 
293         if (containsPart(path, EXEMPLAR_SKIPS)) {
294             return this;
295         }
296 
297         CheckStatus.Type errorOption = errorDefaultOption & sourceLocale.equals(getResolvedCldrFileToCheck().getLocaleID())
298             ? CheckStatus.errorType : CheckStatus.warningType;
299 
300         value = checkAndReplacePlaceholders(path, value, result);
301         if (path.startsWith("//ldml/numbers/miscPatterns") && path.contains("[@type=\"range\"]")) {
302             if (DISALLOWED_IN_RANGE.containsSome(value)) {
303                 result
304                 .add(new CheckStatus()
305                     .setCause(this)
306                     .setMainType(CheckStatus.errorType)
307                     .setSubtype(Subtype.illegalCharactersInPattern)
308                     .setMessage(
309                         "Range patterns should not have letters.",
310                         new Object[] {}));
311             }
312         }
313         // Now handle date patterns.
314         if (containsPart(path, DATE_PARTS)) {
315             if (!extractDatePatternText(value, STAND_IN, justText)) {
316                 return this; // we are done, no text.
317             }
318             value = justText.toString();
319             if (NUMBERS.containsSome(value)) {
320                 UnicodeSet disallowed = new UnicodeSet().addAll(value).retainAll(NUMBERS);
321                 addMissingMessage(disallowed, CheckStatus.errorType,
322                     Subtype.patternCannotContainDigits,
323                     Subtype.patternCannotContainDigits,
324                     "cannot occur in date or time patterns", result);
325             }
326             if (path.endsWith("/hourFormat")) {
327                 UnicodeSet disallowed = new UnicodeSet().addAll(value)
328                     .retainAll(DISALLOWED_HOUR_FORMAT);
329                 if (!disallowed.isEmpty()) {
330                     addMissingMessage(disallowed, CheckStatus.errorType,
331                         Subtype.patternContainsInvalidCharacters,
332                         Subtype.patternContainsInvalidCharacters,
333                         "cannot occur in the hour format", result);
334                 }
335             }
336         }
337 
338         if (path.startsWith("//ldml/posix/messages")) return this;
339 
340         UnicodeSet disallowed;
341 
342         if (path.contains("/currency") && path.contains("/symbol")) {
343             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
344                 disallowed.removeAll(ALL_CURRENCY_SYMBOLS);
345                 disallowed.removeAll(LETTER); // Allow ASCII A-Z in currency symbols
346                 if (disallowed.size() > 0) {
347                     // && asciiNotAllowed(getCldrFileToCheck().getLocaleID(), currency)) {
348                     addMissingMessage(disallowed, errorOption,
349                         Subtype.charactersNotInMainOrAuxiliaryExemplars,
350                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters",
351                         result);
352                 }
353             }
354         } else if (path.contains("/gmtFormat") || path.contains("/gmtZeroFormat")) {
355             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
356                 disallowed.removeAll(LETTER); // Allow ASCII A-Z in gmtFormat and gmtZeroFormat
357                 if (disallowed.size() > 0) {
358                     addMissingMessage(disallowed, errorOption,
359                         Subtype.charactersNotInMainOrAuxiliaryExemplars,
360                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters",
361                         result);
362                 }
363             }
364         } else if (path.contains("/months") || path.contains("/quarters")) {
365             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
366                 disallowed.removeAll("IVXivx"); // Allow Roman-numeral letters in month or quarter names
367                 if (path.contains("/calendar[@type=\"generic\"]/months")) {
368                     disallowed.removeAll("M"); // Generic-calendar month names contain 'M' and do not get modified
369                 }
370                 if (disallowed.size() > 0) {
371                     addMissingMessage(disallowed, errorOption,
372                         Subtype.charactersNotInMainOrAuxiliaryExemplars,
373                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters",
374                         result);
375                 }
376             }
377         } else if (path.contains("/localeDisplayNames") && !path.contains("/localeDisplayPattern")) {
378             // test first for outside of the set.
379             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
380                 if (path.contains("[@type=\"iso8601\"]")) {
381                     disallowed.removeAll("ISO"); // Name of ISO8601 calendar may contain "ISO" regardless of native script
382                 }
383                 if (disallowed.size() > 0) {
384                     addMissingMessage(disallowed, errorOption, Subtype.charactersNotInMainOrAuxiliaryExemplars,
385                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
386                 }
387             }
388             if (path.contains("/codePatterns")) {
389                 disallowed = new UnicodeSet().addAll(value).retainAll(NUMBERS);
390                 if (!disallowed.isEmpty()) {
391                     addMissingMessage(disallowed, CheckStatus.errorType,
392                         Subtype.patternCannotContainDigits,
393                         Subtype.patternCannotContainDigits,
394                         "cannot occur in locale fields", result);
395                 }
396             }
397         } else if (path.contains("/units")) {
398             String noValidParentheses = IGNORE_PLACEHOLDER_PARENTHESES.matcher(value).replaceAll("");
399             disallowed = new UnicodeSet().addAll(START_PAREN).addAll(END_PAREN)
400                 .retainAll(noValidParentheses);
401             if (!disallowed.isEmpty()) {
402                 addMissingMessage(disallowed, CheckStatus.errorType,
403                     Subtype.parenthesesNotAllowed,
404                     Subtype.parenthesesNotAllowed,
405                     "cannot occur in units", result);
406             }
407         } else if (path.endsWith("/exemplarCity")) {
408             disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value);
409             if (disallowed != null) {
410                 if ("root".equals(sourceLocale)) {
411                     return this;
412                 }
413                 // Get script of locale.
414                 LocaleIDParser parser = new LocaleIDParser().set(sourceLocale);
415                 String script = parser.getScript();
416                 if (script.length() == 0) {
417                     String localeID = sdi.getLikelySubtags().get(sourceLocale);
418                     if (localeID == null) {
419                         localeID = sdi.getLikelySubtags().get(parser.getLanguage());
420                         if (localeID == null) {
421                             throw new IllegalArgumentException(
422                                 "A likely subtag for " + parser.getLanguage() +
423                                 " is required to get its script.");
424                         }
425                     }
426                     script = parser.set(localeID).getScript();
427                 }
428                 int myscript = UScript.getCodeFromName(script);
429                 UnicodeSet toRemove = new UnicodeSet();
430                 for (int i = 0; i < disallowed.size(); i++) {
431                     int c = disallowed.charAt(i);
432                     if (UScript.getScript(c) == myscript) {
433                         toRemove.add(c);
434                     }
435                 }
436                 disallowed.removeAll(toRemove);
437                 if (disallowed.size() > 0) {
438                     addMissingMessage(disallowed, errorOption, Subtype.charactersNotInMainOrAuxiliaryExemplars,
439                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
440                 }
441             }
442         } else if (path.contains("/annotations") && !path.contains("[@type")) {
443             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
444                 addMissingMessage(disallowed, CheckStatus.warningType, Subtype.charactersNotInMainOrAuxiliaryExemplars,
445                     Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
446             }
447         } else {
448             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
449                 addMissingMessage(disallowed, errorOption, Subtype.charactersNotInMainOrAuxiliaryExemplars,
450                     Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
451             }
452         }
453 
454         // check for spaces
455 
456         if (!value.equals(value.trim())) {
457             if (!leadOrTrailWhitespaceOk.reset(path).find()) {
458                 result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
459                     .setSubtype(Subtype.mustNotStartOrEndWithSpace)
460                     .setMessage("This item must not start or end with whitespace, or be empty."));
461             }
462         }
463         // if (value.contains("  ")) {
464         // result.add(new
465         // CheckStatus().setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.mustNotStartOrEndWithSpace)
466         // .setMessage("This item must not contain two space characters in a row."));
467         // }
468         return this;
469     }
470 
checkAndReplacePlaceholders(String path, String value, List<CheckStatus> result)471     private String checkAndReplacePlaceholders(String path, String value, List<CheckStatus> result) {
472         CheckStatus.Type statusType = getPhase() == Phase.BUILD ? CheckStatus.warningType : CheckStatus.errorType; // new errors, so get past the tests.
473 
474         // Get information about what should be there
475         PlaceholderStatus placeholderStatus = patternPlaceholders.getStatus(path);
476         Map<String, PlaceholderInfo> placeholderInfo = patternPlaceholders.get(path);
477 
478         int minimum = placeholderInfo.size();
479         int maximum = placeholderInfo.size();
480 
481         if (placeholderStatus == PlaceholderStatus.LOCALE_DEPENDENT || placeholderStatus == PlaceholderStatus.MULTIPLE) {
482             // if locale dependent, it is because of count= or ordinal=. Figure out what the values are, and whether we are allowed to have none or one
483             PluralRules rules = PluralRules.forLocale(new ULocale(getCldrFileToCheck().getLocaleID()));
484             if (rules != null) {
485                 XPathParts parts = XPathParts.getFrozenInstance(path);
486                 String keyword = parts.getAttributeValue(-1, "count");
487                 if (keyword == null) {
488                     keyword = parts.getAttributeValue(-1, "ordinal");
489                 }
490                 try {
491                     if (rules.getUniqueKeywordValue(keyword) != PluralRules.NO_UNIQUE_VALUE) {
492                         minimum = 0;
493                     }
494                 } catch (Exception e) {
495                     // internal error, skip
496                 }
497             }
498         }
499 
500         // TODO: move these tests to CheckPlaceholder
501 
502         // Now see what is there, and see if they match
503         Matcher matcher = patternMatcher.reset(value);
504         Multiset<String> matchList = TreeMultiset.create(); // Look for duplicate values.
505         while (matcher.find()) {
506             matchList.add(matcher.group());
507         }
508         final Set<String> distinctPlaceholders = matchList.elementSet();
509         int countDistinctPlaceholders = distinctPlaceholders.size();
510 
511         if (countDistinctPlaceholders > 0) {
512             // Verify that all placeholders are monotonically increasing from zero.
513             int expected = 0;
514             for (String element : distinctPlaceholders) {
515                 // int elementValue = Integer.parseInt(element, 1, element.length()-1, 10);
516                 int elementValue = Integer.parseInt(element.substring(1, element.length()-1), 10);
517                 if (elementValue != expected) {
518                     result.add(new CheckStatus().setCause(this).setMainType(statusType)
519                         .setSubtype(Subtype.gapsInPlaceholderNumbers)
520                         .setMessage("Placeholders {0} should be strictly increasing, starting at zero.", distinctPlaceholders));
521                     break;
522                 }
523                 ++expected;
524             }
525         }
526 
527         // Check if duplicates are allowed
528         if (matchList.size() > countDistinctPlaceholders && placeholderStatus != PlaceholderStatus.MULTIPLE) {
529             Set<String> errors = new LinkedHashSet<>();
530             for (Entry<String> entry : matchList.entrySet()) {
531                 if (entry.getCount() > 1) {
532                     errors.add(entry.getElement());
533                 }
534             }
535             result.add(new CheckStatus().setCause(this).setMainType(statusType)
536                 .setSubtype(Subtype.duplicatePlaceholders)
537                 .setMessage("Duplicate placeholders: {0}.", Joiner.on(", ").join(errors)));
538         }
539 
540         // Now see if the number we have is within bounds
541 
542         if (countDistinctPlaceholders < minimum) {
543             result.add(new CheckStatus().setCause(this).setMainType(statusType)
544                 .setSubtype(Subtype.missingPlaceholders)
545                 .setMessage("Need at least {0} placeholder(s), but only have {1}. Placeholders are: {2}", minimum, countDistinctPlaceholders, placeholderInfo));
546         } else {
547             if (countDistinctPlaceholders > maximum) {
548                 result.add(new CheckStatus().setCause(this).setMainType(statusType)
549                     .setSubtype(Subtype.extraPlaceholders)
550                     .setMessage("Need no more than {0} placeholders, but have too many with {1}.", countDistinctPlaceholders, minimum));
551             }
552         }
553         // Return the pattern with placeholders replaced
554         return matchList.isEmpty() ? value : patternMatcher.replaceAll(STAND_IN);
555     }
556 
557     /**
558      * Checks if ASCII characters are allowed in a currency symbol in the specified locale.
559      * @param localeID the locale ID that the currency is in
560      * @param currency the currency to be checked
561      * @return true if ASCII is not allowed
562      */
asciiNotAllowed(String localeID, String currency)563     private boolean asciiNotAllowed(String localeID, String currency) {
564         // Don't allow ascii at all for bidi scripts.
565         String charOrientation = getResolvedCldrFileToCheck().getStringValue(
566             "//ldml/layout/orientation/characterOrder");
567         if (charOrientation.equals("right-to-left")) {
568             return true;
569         }
570 
571         // Get script of locale. if Latn, quit.
572         LocaleIDParser parser = new LocaleIDParser().set(localeID);
573         String script = parser.getScript();
574         if (script.length() == 0) {
575             localeID = sdi.getLikelySubtags().get(localeID);
576             if (localeID == null) {
577                 localeID = sdi.getLikelySubtags().get(parser.getLanguage());
578                 if (localeID == null) {
579                     throw new IllegalArgumentException(
580                         "A likely subtag for " + parser.getLanguage() +
581                         " is required to get its script.");
582                 }
583             }
584             script = parser.set(localeID).getScript();
585         }
586         if (script.equals("Latn")) {
587             return false;
588         }
589 
590         // Enforce checking of for other non-Latin scripts, for all currencies
591         // whose countries use that script, e.g. Russian should have Cyrillic
592         // currency symbols for modern currencies of countries with official
593         // languages whose script is Cyrillic (Bulgaria, Serbia, ...).
594         Set<String> currencies = getCurrenciesForScript(script);
595         return currencies != null && currencies.contains(currency);
596     }
597 
getCurrenciesForScript(String script)598     private Set<String> getCurrenciesForScript(String script) {
599         if (scriptToCurrencies != null) return scriptToCurrencies.get(script);
600 
601         // Get mapping of scripts to the territories that use that script in
602         // any of their primary languages.
603         Relation scriptToTerritories = new Relation(new HashMap<String, Set<String>>(), HashSet.class);
604         for (String lang : sdi.getBasicLanguageDataLanguages()) {
605             BasicLanguageData langData = sdi.getBasicLanguageDataMap(lang).get(Type.primary);
606             if (langData == null) {
607                 continue;
608             }
609             for (String curScript : langData.getScripts()) {
610                 scriptToTerritories.putAll(curScript, langData.getTerritories());
611             }
612         }
613 
614         // For each territory, get all of its legal tender currencies.
615         Date now = new Date(System.currentTimeMillis());
616         scriptToCurrencies = new Relation(new HashMap<String, Set<String>>(), HashSet.class);
617         for (Object curScript : scriptToTerritories.keySet()) {
618             Set<String> territories = scriptToTerritories.get(curScript);
619             Set<String> currencies = new HashSet<>();
620             for (String territory : territories) {
621                 Set<CurrencyDateInfo> currencyInfo = sdi.getCurrencyDateInfo(territory);
622                 for (CurrencyDateInfo info : currencyInfo) {
623                     if (info.isLegalTender() && info.getEnd().compareTo(now) > 0) {
624                         currencies.add(info.getCurrency());
625                     }
626                 }
627             }
628             scriptToCurrencies.putAll(curScript, currencies);
629         }
630         return scriptToCurrencies.get(script);
631     }
632 
633     /**
634      * Extracts just the text from a date field, replacing all the variable fields by variableReplacement. Return null
635      * if
636      * there is an error (a different test will find that error).
637      */
extractDatePatternText(String value, String variableReplacement, StringBuilder justText)638     public boolean extractDatePatternText(String value, String variableReplacement, StringBuilder justText) {
639         boolean haveText = false;
640         try {
641             formatParser.set(value);
642         } catch (Exception e) {
643             return false; // give up, it is illegal
644         }
645         boolean doReplacement = variableReplacement != null && variableReplacement.length() > 0;
646         justText.setLength(0);
647         for (Object item : formatParser.getItems()) {
648             if (item instanceof String) {
649                 justText.append(item);
650                 haveText = true;
651             } else {
652                 if (doReplacement) {
653                     justText.append(variableReplacement);
654                 }
655             }
656         }
657         return haveText;
658     }
659 
containsPart(String source, String... segments)660     public boolean containsPart(String source, String... segments) {
661         for (int i = 0; i < segments.length; ++i) {
662             if (source.indexOf(segments[i]) > 0) {
663                 return true;
664             }
665         }
666         return false;
667     }
668 
669     static final String TEST = "؉";
670 
addMissingMessage(UnicodeSet missing, CheckStatus.Type warningVsError, Subtype subtype, Subtype subtypeAscii, String qualifier, List<CheckStatus> result)671     private void addMissingMessage(UnicodeSet missing, CheckStatus.Type warningVsError, Subtype subtype,
672         Subtype subtypeAscii,
673         String qualifier, List<CheckStatus> result) {
674         String fixedMissing = prettyPrint.format(missing);
675         BitSet scripts = new BitSet();
676         for (String s : missing) {
677             final int script = UScript.getScript(s.codePointAt(0));
678             if (script == UScript.INHERITED || script == UScript.COMMON) {
679                 continue;
680             }
681             scripts.set(script);
682         }
683         StringBuilder scriptString = new StringBuilder();
684         if (!scripts.isEmpty()) {
685             scriptString.append("{");
686             for (int i = scripts.nextSetBit(0); i >= 0; i = scripts.nextSetBit(i + 1)) {
687                 if (scriptString.length() > 1) {
688                     scriptString.append(", ");
689                 }
690                 scriptString.append(UScript.getName(i));
691             }
692             scriptString.append("}");
693         }
694         result
695         .add(new CheckStatus()
696             .setCause(this)
697             .setMainType(warningVsError)
698             .setSubtype(ASCII.containsAll(missing) ? subtypeAscii : subtype)
699             .setMessage(
700                 "The characters \u200E{0}\u200E {1} {2}. "
701                     +
702                     "For what to do, see <i>Handling Warnings</i> in <a target='CLDR-ST-DOCS' href='http://cldr.org/translation/characters#TOC-Handing-Warnings'>Characters</a>.",
703                     new Object[] { fixedMissing, scriptString, qualifier }));
704     }
705 
706     static final Normalizer2 NFC = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE);
707 
708     /**
709      * Return null if ok, otherwise UnicodeSet of bad characters
710      *
711      * @param exemplarSet
712      * @param value
713      * @return
714      */
containsAllCountingParens(UnicodeSet exemplarSet, UnicodeSet exemplarSetPlusASCII, String value)715     private UnicodeSet containsAllCountingParens(UnicodeSet exemplarSet, UnicodeSet exemplarSetPlusASCII, String value) {
716         UnicodeSet result = null;
717         if (exemplarSet.containsAll(value)) {
718             return result;
719         }
720 
721         // Normalize
722         value = NFC.normalize(value);
723 
724         // if we failed, then check that everything outside of () is ok.
725         // and everything inside parens is either ASCII or in the set
726         int lastPos = 0;
727         while (true) {
728             int start = START_PAREN.findIn(value, lastPos, false);
729             String outside = value.substring(lastPos, start);
730             result = addDisallowedItems(exemplarSet, outside, result);
731             if (start == value.length()) {
732                 break; // all done
733             }
734             ++start;
735             int end = END_PAREN.findIn(value, start, false);
736             // don't worry about mixed brackets
737             String inside = value.substring(start, end);
738             result = addDisallowedItems(exemplarSetPlusASCII, inside, result);
739             if (end == value.length()) {
740                 break; // all done
741             }
742             lastPos = end + 1;
743         }
744         return result;
745     }
746 
addDisallowedItems(UnicodeSet exemplarSet, String outside, UnicodeSet result)747     private UnicodeSet addDisallowedItems(UnicodeSet exemplarSet, String outside, UnicodeSet result) {
748         if (!exemplarSet.containsAll(outside)) {
749             if (result == null) {
750                 result = new UnicodeSet();
751             }
752             result.addAll(new UnicodeSet().addAll(outside).removeAll(exemplarSet));
753         }
754         return result;
755     }
756 }
757