• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2005-2012, International Business Machines Corporation and        *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.test;
8 
9 import java.util.BitSet;
10 import java.util.Date;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.LinkedHashSet;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Set;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19 
20 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
21 import org.unicode.cldr.util.CLDRConfig;
22 import org.unicode.cldr.util.CLDRFile;
23 import org.unicode.cldr.util.CLDRFile.Status;
24 import org.unicode.cldr.util.Factory;
25 import org.unicode.cldr.util.InternalCldrException;
26 import org.unicode.cldr.util.LocaleIDParser;
27 import org.unicode.cldr.util.PatternCache;
28 import org.unicode.cldr.util.PatternPlaceholders;
29 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo;
30 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus;
31 import org.unicode.cldr.util.SupplementalDataInfo;
32 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
33 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
34 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
35 import org.unicode.cldr.util.UnicodeSetPrettyPrinter;
36 import org.unicode.cldr.util.XMLSource;
37 import org.unicode.cldr.util.XPathParts;
38 
39 import com.google.common.base.Joiner;
40 import com.google.common.collect.Multiset;
41 import com.google.common.collect.Multiset.Entry;
42 import com.google.common.collect.TreeMultiset;
43 import com.ibm.icu.impl.Relation;
44 import com.ibm.icu.lang.UScript;
45 import com.ibm.icu.text.Collator;
46 import com.ibm.icu.text.DateTimePatternGenerator;
47 import com.ibm.icu.text.Normalizer2;
48 import com.ibm.icu.text.PluralRules;
49 import com.ibm.icu.text.Transform;
50 import com.ibm.icu.text.UnicodeSet;
51 import com.ibm.icu.text.PluralRules.PluralType;
52 import com.ibm.icu.util.ULocale;
53 
54 public class CheckForExemplars extends FactoryCheckCLDR {
55     private static final UnicodeSet RTL_CONTROLS = new UnicodeSet("[\\u061C\\u200E\\u200F\\u202A-\\u202D\\u2066-\\u2069]");
56 
57     private static final UnicodeSet RTL = new UnicodeSet("[[:bc=AL:][:bc=R:]]");
58 
59     private static final String STAND_IN = "#";
60 
61     // private final UnicodeSet commonAndInherited = new UnicodeSet(CheckExemplars.Allowed).complement();
62     // "[[:script=common:][:script=inherited:][:alphabetic=false:]]");
63     static String[] EXEMPLAR_SKIPS = {
64         "/currencySpacing",
65         "/exemplarCharacters",
66         // "/pattern",
67         "/localizedPatternChars",
68         "/segmentations",
69         "/references",
70         "/localeDisplayNames/variants/",
71         "/commonlyUsed",
72         "/defaultNumberingSystem",
73         "/otherNumberingSystems",
74         "/exponential",
75         "/nan",
76         "/scientificFormats",
77         "/inText",
78         "/orientation",
79         "/symbol[@alt=\"narrow\"]",
80         "/characters/parseLenients"
81     };
82 
83     static String[] DATE_PARTS = {
84         "/hourFormat",
85         "/dateFormatItem",
86         "/intervalFormatItem",
87         "/dateFormatLength",
88         "timeFormatLength"
89     };
90 
91     static final UnicodeSet START_PAREN = new UnicodeSet("[[:Ps:]]").freeze();
92     static final UnicodeSet END_PAREN = new UnicodeSet("[[:Pe:]]").freeze();
93     static final UnicodeSet ALL_CURRENCY_SYMBOLS = new UnicodeSet("[[:Sc:]]").freeze();
94     static final UnicodeSet LETTER = new UnicodeSet("[[A-Za-z]]").freeze();
95     static final UnicodeSet NUMBERS = new UnicodeSet("[[:N:]]").freeze();
96     static final UnicodeSet DISALLOWED_HOUR_FORMAT = new UnicodeSet("[[:letter:]]").remove('H').remove('m').freeze();
97     static final UnicodeSet DISALLOWED_IN_RANGE = new UnicodeSet("[:L:]").freeze();
98 
99     private UnicodeSet exemplars;
100     private UnicodeSet exemplarsPlusAscii;
101     //private static final UnicodeSet DISALLOWED_IN_scriptRegionExemplars = new UnicodeSet("[()();,;,]").freeze();
102     //private static final UnicodeSet DISALLOWED_IN_scriptRegionExemplarsWithParens = new UnicodeSet("[;,;,]").freeze();
103 
104     // Hack until cldrbug 6566 is fixed. TODO
105     private static final Pattern IGNORE_PLACEHOLDER_PARENTHESES = PatternCache.get("\\p{Ps}#\\p{Pe}");
106     // For the following: traditional placeholders just have {0}, {1}, {2}, ...
107     // But personName namePattern placeHolders start with [a-z], then continue with [0-9a-zA-Z-]+
108     // They need to be distinguished from non-placeholder patterns using {} in UnicodeSets
109     public static final Pattern PLACEHOLDER= PatternCache.get("\\{[0-9a-zA-Z-]+\\}");
110 
111 
112     // private UnicodeSet currencySymbolExemplars;
113     private boolean skip;
114     private Collator col;
115     private Collator spaceCol;
116     UnicodeSetPrettyPrinter prettyPrint;
117     private Status otherPathStatus = new Status();
118     private Matcher patternMatcher = PLACEHOLDER.matcher("");
119     private boolean errorDefaultOption;
120 
121     // for extracting date pattern text
122     private DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
123     StringBuilder justText = new StringBuilder();
124 
125     // public static final Pattern SUPPOSED_TO_BE_MESSAGE_FORMAT_PATTERN = PatternCache.get("/(" +
126     // "codePattern" +
127     // "|dateRangePattern" +
128     // "|dateTimeFormat[^/]*?/pattern" +
129     // "|appendItem" +
130     // "|intervalFormatFallback" +
131     // "|hoursFormat" +
132     // "|gmtFormat" +
133     // "|regionFormat" +
134     // "|fallbackFormat" +
135     // "|unitPattern.*@count=\"(zero|one|two|few|many|other)\"" +
136     // "|localePattern" +
137     // "|localeKeyTypePattern" +
138     // "|listPatternPart" +
139     // "|ellipsis" +
140     // "|monthPattern" +
141     // ")");
142     // private Matcher supposedToBeMessageFormat = SUPPOSED_TO_BE_MESSAGE_FORMAT_PATTERN.matcher("");
143 
144     public static final Pattern LEAD_OR_TRAIL_WHITESPACE_OK = PatternCache.get("/(" +
145         "references/reference" +
146         "|insertBetween" +
147         ")");
148     private Matcher leadOrTrailWhitespaceOk = LEAD_OR_TRAIL_WHITESPACE_OK.matcher("");
149 
150     private static UnicodeSet ASCII = new UnicodeSet("[\\u0020-\\u007F]").freeze();
151 
152     private PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance();
153     private SupplementalDataInfo sdi;
154     private Relation scriptToCurrencies;
155 
CheckForExemplars(Factory factory)156     public CheckForExemplars(Factory factory) {
157         super(factory);
158         // patternPlaceholders = RegexLookup.of(new PlaceholderTransform())
159         // .loadFromFile(PatternPlaceholders.class, "data/Placeholders.txt");
160         sdi = SupplementalDataInfo.getInstance();
161     }
162 
163     /**
164      * Adapted from GenerateXMB.MapTransform
165      *
166      * @author jchye
167      *
168      */
169     static class PlaceholderTransform implements Transform<String, Set<String>> {
170         @Override
transform(String source)171         public Set<String> transform(String source) {
172             Set<String> placeholders = new LinkedHashSet<>();
173             String[] parts = source.split(";\\s+");
174             for (String part : parts) {
175                 int equalsPos = part.indexOf('=');
176                 String placeholder = part.substring(0, equalsPos).trim();
177                 placeholders.add(placeholder);
178             }
179             return placeholders;
180         }
181     }
182 
183     @Override
setCldrFileToCheck(CLDRFile cldrFile, Options options, List<CheckStatus> possibleErrors)184     public CheckCLDR setCldrFileToCheck(CLDRFile cldrFile, Options options, List<CheckStatus> possibleErrors) {
185         if (cldrFile == null) return this;
186         skip = true;
187         super.setCldrFileToCheck(cldrFile, options, possibleErrors);
188         if (cldrFile.getLocaleID().equals("root")) {
189             return this;
190         }
191 
192         errorDefaultOption = options.get(Options.Option.exemplarErrors) != null;
193 
194         String locale = cldrFile.getLocaleID();
195         col = Collator.getInstance(new ULocale(locale));
196         spaceCol = Collator.getInstance(new ULocale(locale));
197         spaceCol.setStrength(Collator.PRIMARY);
198 
199         CLDRFile resolvedFile = getResolvedCldrFileToCheck();
200         boolean[] ok = new boolean[1];
201         exemplars = safeGetExemplars("", possibleErrors, resolvedFile, ok);
202 
203         if (exemplars == null) {
204             CheckStatus item = new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
205                 .setSubtype(Subtype.noExemplarCharacters)
206                 .setMessage("No Exemplar Characters: {0}", new Object[] { this.getClass().getName() });
207             possibleErrors.add(item);
208             return this;
209         } else if (!ok[0]) {
210             exemplars = new UnicodeSet();
211         } else {
212             exemplars = new UnicodeSet(exemplars); // modifiable copy
213         }
214 
215         boolean isRTL = RTL.containsSome(exemplars);
216         if (isRTL) {
217             exemplars.addAll(RTL_CONTROLS);
218         }
219         // UnicodeSet temp = resolvedFile.getExemplarSet("standard");
220         // if (temp != null) exemplars.addAll(temp);
221         UnicodeSet auxiliary = safeGetExemplars("auxiliary", possibleErrors, resolvedFile, ok); // resolvedFile.getExemplarSet("auxiliary",
222         // CLDRFile.WinningChoice.WINNING);
223         if (auxiliary != null) {
224             exemplars.addAll(auxiliary);
225         }
226 
227         if (CheckExemplars.USE_PUNCTUATION) {
228             UnicodeSet punctuation = safeGetExemplars("punctuation", possibleErrors, resolvedFile, ok); // resolvedFile.getExemplarSet("auxiliary",
229             if (punctuation != null) {
230                 exemplars.addAll(punctuation);
231             }
232 
233             UnicodeSet numbers = getNumberSystemExemplars();
234             exemplars.addAll(numbers);
235 
236             // TODO fix replacement character
237             exemplars.add(STAND_IN);
238         }
239 
240         exemplars.addAll(CheckExemplars.AlwaysOK).freeze();
241         exemplarsPlusAscii = new UnicodeSet(exemplars).addAll(ASCII).freeze();
242 
243         skip = false;
244         prettyPrint = new UnicodeSetPrettyPrinter()
245             .setOrdering(col != null ? col : Collator.getInstance(ULocale.ROOT))
246             .setSpaceComparator(col != null ? col : Collator.getInstance(ULocale.ROOT)
247                 .setStrength2(Collator.PRIMARY))
248             .setCompressRanges(true);
249         return this;
250     }
251 
getNumberSystemExemplars()252     private UnicodeSet getNumberSystemExemplars() {
253         String numberSystem = getCldrFileToCheck().getStringValue("//ldml/numbers/defaultNumberingSystem");
254         String digits = sdi.getDigits(numberSystem);
255         return new UnicodeSet().addAll(digits);
256     }
257 
safeGetExemplars(String type, List<CheckStatus> possibleErrors, CLDRFile resolvedFile, boolean[] ok)258     private UnicodeSet safeGetExemplars(String type, List<CheckStatus> possibleErrors, CLDRFile resolvedFile,
259         boolean[] ok) {
260         UnicodeSet result = null;
261         try {
262             result = resolvedFile.getExemplarSet(type, CLDRFile.WinningChoice.WINNING);
263             ok[0] = true;
264         } catch (IllegalArgumentException iae) {
265             possibleErrors.add(new CheckStatus()
266                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.couldNotAccessExemplars)
267                 .setMessage("Could not get exemplar set: " + iae.toString()));
268             ok[0] = false;
269         }
270         return result;
271     }
272 
273     @Override
handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)274     public CheckCLDR handleCheck(String path, String fullPath, String value,
275         Options options, List<CheckStatus> result) {
276         if (fullPath == null) return this; // skip paths that we don't have
277         if (value == null) return this; // skip values that we don't have ?
278         if (skip) return this;
279         if (path == null) {
280             throw new InternalCldrException("Empty path!");
281         } else if (getCldrFileToCheck() == null) {
282             throw new InternalCldrException("no file to check!");
283         }
284         String sourceLocale = getResolvedCldrFileToCheck().getSourceLocaleID(path, otherPathStatus);
285 
286         // if we are an alias to another path, then skip
287         // if (!path.equals(otherPathStatus.pathWhereFound)) {
288         // return this;
289         // }
290 
291         // now check locale source
292         if (XMLSource.CODE_FALLBACK_ID.equals(sourceLocale)) {
293             return this;
294             // } else if ("root".equals(sourceLocale)) {
295             // // skip eras for non-gregorian
296             // if (true) return this;
297             // if (path.indexOf("/calendar") >= 0 && path.indexOf("gregorian") <= 0) return this;
298         }
299 
300         if (containsPart(path, EXEMPLAR_SKIPS)) {
301             return this;
302         }
303 
304         CheckStatus.Type errorOption = errorDefaultOption & sourceLocale.equals(getResolvedCldrFileToCheck().getLocaleID())
305             ? CheckStatus.errorType : CheckStatus.warningType;
306 
307         value = checkAndReplacePlaceholders(path, value, result);
308         if (path.startsWith("//ldml/numbers/miscPatterns") && path.contains("[@type=\"range\"]")) {
309             if (DISALLOWED_IN_RANGE.containsSome(value)) {
310                 result
311                 .add(new CheckStatus()
312                     .setCause(this)
313                     .setMainType(CheckStatus.errorType)
314                     .setSubtype(Subtype.illegalCharactersInPattern)
315                     .setMessage(
316                         "Range patterns should not have letters.",
317                         new Object[] {}));
318             }
319         }
320         // Now handle date patterns.
321         if (containsPart(path, DATE_PARTS)) {
322             if (!extractDatePatternText(value, STAND_IN, justText)) {
323                 return this; // we are done, no text.
324             }
325             value = justText.toString();
326             if (NUMBERS.containsSome(value)) {
327                 UnicodeSet disallowed = new UnicodeSet().addAll(value).retainAll(NUMBERS);
328                 addMissingMessage(disallowed, CheckStatus.errorType,
329                     Subtype.patternCannotContainDigits,
330                     Subtype.patternCannotContainDigits,
331                     "cannot occur in date or time patterns", result);
332             }
333             if (path.endsWith("/hourFormat")) {
334                 UnicodeSet disallowed = new UnicodeSet().addAll(value)
335                     .retainAll(DISALLOWED_HOUR_FORMAT);
336                 if (!disallowed.isEmpty()) {
337                     addMissingMessage(disallowed, CheckStatus.errorType,
338                         Subtype.patternContainsInvalidCharacters,
339                         Subtype.patternContainsInvalidCharacters,
340                         "cannot occur in the hour format", result);
341                 }
342             }
343         }
344 
345         if (path.startsWith("//ldml/posix/messages")) return this;
346 
347         UnicodeSet disallowed;
348 
349         if (path.contains("/currency") && path.contains("/symbol")) {
350             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
351                 disallowed.removeAll(ALL_CURRENCY_SYMBOLS);
352                 disallowed.removeAll(LETTER); // Allow ASCII A-Z in currency symbols
353                 if (disallowed.size() > 0) {
354                     // && asciiNotAllowed(getCldrFileToCheck().getLocaleID(), currency)) {
355                     addMissingMessage(disallowed, errorOption,
356                         Subtype.charactersNotInMainOrAuxiliaryExemplars,
357                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters",
358                         result);
359                 }
360             }
361         } else if (path.contains("/gmtFormat") || path.contains("/gmtZeroFormat")) {
362             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
363                 disallowed.removeAll(LETTER); // Allow ASCII A-Z in gmtFormat and gmtZeroFormat
364                 if (disallowed.size() > 0) {
365                     addMissingMessage(disallowed, errorOption,
366                         Subtype.charactersNotInMainOrAuxiliaryExemplars,
367                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters",
368                         result);
369                 }
370             }
371         } else if (path.contains("/months") || path.contains("/quarters")) {
372             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
373                 disallowed.removeAll("IVXivx"); // Allow Roman-numeral letters in month or quarter names
374                 if (path.contains("/calendar[@type=\"generic\"]/months")) {
375                     disallowed.removeAll("M"); // Generic-calendar month names contain 'M' and do not get modified
376                 }
377                 if (disallowed.size() > 0) {
378                     addMissingMessage(disallowed, errorOption,
379                         Subtype.charactersNotInMainOrAuxiliaryExemplars,
380                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters",
381                         result);
382                 }
383             }
384         } else if (path.contains("/localeDisplayNames") && !path.contains("/localeDisplayPattern")) {
385             // test first for outside of the set.
386             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
387                 if (path.contains("[@type=\"iso8601\"]")) {
388                     disallowed.removeAll("ISO"); // Name of ISO8601 calendar may contain "ISO" regardless of native script
389                 }
390                 if (disallowed.size() > 0) {
391                     addMissingMessage(disallowed, errorOption, Subtype.charactersNotInMainOrAuxiliaryExemplars,
392                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
393                 }
394             }
395             if (path.contains("/codePatterns")) {
396                 disallowed = new UnicodeSet().addAll(value).retainAll(NUMBERS);
397                 if (!disallowed.isEmpty()) {
398                     addMissingMessage(disallowed, CheckStatus.errorType,
399                         Subtype.patternCannotContainDigits,
400                         Subtype.patternCannotContainDigits,
401                         "cannot occur in locale fields", result);
402                 }
403             }
404         } else if (path.contains("/units")) {
405             String noValidParentheses = IGNORE_PLACEHOLDER_PARENTHESES.matcher(value).replaceAll("");
406             disallowed = new UnicodeSet().addAll(START_PAREN).addAll(END_PAREN)
407                 .retainAll(noValidParentheses);
408             if (!disallowed.isEmpty()) {
409                 addMissingMessage(disallowed, CheckStatus.errorType,
410                     Subtype.parenthesesNotAllowed,
411                     Subtype.parenthesesNotAllowed,
412                     "cannot occur in units", result);
413             }
414         } else if (path.endsWith("/exemplarCity")) {
415             disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value);
416             if (disallowed != null) {
417                 if ("root".equals(sourceLocale)) {
418                     return this;
419                 }
420                 // Get script of locale.
421                 LocaleIDParser parser = new LocaleIDParser().set(sourceLocale);
422                 String script = parser.getScript();
423                 if (script.length() == 0) {
424                     String localeID = sdi.getLikelySubtags().get(sourceLocale);
425                     if (localeID == null) {
426                         localeID = sdi.getLikelySubtags().get(parser.getLanguage());
427                         if (localeID == null) {
428                             throw new IllegalArgumentException(
429                                 "A likely subtag for " + parser.getLanguage() +
430                                 " is required to get its script.");
431                         }
432                     }
433                     script = parser.set(localeID).getScript();
434                 }
435                 int myscript = UScript.getCodeFromName(script);
436                 UnicodeSet toRemove = new UnicodeSet();
437                 for (int i = 0; i < disallowed.size(); i++) {
438                     int c = disallowed.charAt(i);
439                     if (UScript.getScript(c) == myscript) {
440                         toRemove.add(c);
441                     }
442                 }
443                 disallowed.removeAll(toRemove);
444                 if (disallowed.size() > 0) {
445                     addMissingMessage(disallowed, errorOption, Subtype.charactersNotInMainOrAuxiliaryExemplars,
446                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
447                 }
448             }
449         } else if (path.contains("/annotations") && !path.contains("[@type")) {
450             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
451                 addMissingMessage(disallowed, CheckStatus.warningType, Subtype.charactersNotInMainOrAuxiliaryExemplars,
452                     Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
453             }
454         } else {
455             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
456                 addMissingMessage(disallowed, errorOption, Subtype.charactersNotInMainOrAuxiliaryExemplars,
457                     Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
458             }
459         }
460 
461         // check for spaces
462 
463         if (!value.equals(value.trim())  && !path.contains("/foreignSpaceReplacement")) { // foreignSpaceReplacement value can be just space
464             if (!leadOrTrailWhitespaceOk.reset(path).find()) {
465                 result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
466                     .setSubtype(Subtype.mustNotStartOrEndWithSpace)
467                     .setMessage("This item must not start or end with whitespace, or be empty."));
468             }
469         }
470         // if (value.contains("  ")) {
471         // result.add(new
472         // CheckStatus().setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.mustNotStartOrEndWithSpace)
473         // .setMessage("This item must not contain two space characters in a row."));
474         // }
475         return this;
476     }
477 
checkAndReplacePlaceholders(String path, String value, List<CheckStatus> result)478     private String checkAndReplacePlaceholders(String path, String value, List<CheckStatus> result) {
479         CheckStatus.Type statusType = getPhase() == Phase.BUILD ? CheckStatus.warningType : CheckStatus.errorType; // new errors, so get past the tests.
480 
481         // Get information about what should be there
482         PlaceholderStatus placeholderStatus = patternPlaceholders.getStatus(path);
483         Map<String, PlaceholderInfo> placeholderInfo = patternPlaceholders.get(path);
484 
485         int minimum = placeholderInfo.size();
486         int maximum = placeholderInfo.size();
487 
488         if (placeholderStatus == PlaceholderStatus.LOCALE_DEPENDENT || placeholderStatus == PlaceholderStatus.MULTIPLE) {
489             // if locale dependent, it is because of count= or ordinal=. Figure out what the values are, and whether we are allowed to have none or one
490             XPathParts parts = XPathParts.getFrozenInstance(path);
491             PluralRules.PluralType ptype = PluralType.CARDINAL;
492             String keyword = parts.getAttributeValue(-1, "count");
493             if (keyword == null) {
494                 keyword = parts.getAttributeValue(-1, "ordinal");
495                 ptype = PluralType.ORDINAL;
496             }
497             SupplementalDataInfo sdi = CLDRConfig.getInstance().getSupplementalDataInfo();
498             PluralRules rules =  sdi.getPluralRules(new ULocale(getCldrFileToCheck().getLocaleID()), ptype);
499             if (rules != null) {
500                 try {
501                     if (rules.getUniqueKeywordValue(keyword) != PluralRules.NO_UNIQUE_VALUE) {
502                         minimum = 0;
503                     }
504                 } catch (Exception e) {
505                     // internal error, skip
506                 }
507             }
508         } else if (placeholderStatus == PlaceholderStatus.OPTIONAL) {
509             minimum = 1;
510         }
511 
512         // TODO: move these tests to CheckPlaceholder
513 
514         // Now see what is there, and see if they match
515         Matcher matcher = patternMatcher.reset(value);
516         Multiset<String> matchList = TreeMultiset.create(); // Look for duplicate values.
517         while (matcher.find()) {
518             matchList.add(matcher.group());
519         }
520         final Set<String> distinctPlaceholders = matchList.elementSet();
521         int countDistinctPlaceholders = distinctPlaceholders.size();
522 
523         if (countDistinctPlaceholders > 0 && placeholderStatus != PlaceholderStatus.OPTIONAL ) {
524             // Verify that all placeholders are monotonically increasing from zero.
525             int expected = 0;
526             for (String element : distinctPlaceholders) {
527                 // int elementValue = Integer.parseInt(element, 1, element.length()-1, 10);
528                 int elementValue = Integer.parseInt(element.substring(1, element.length()-1), 10);
529                 if (elementValue != expected) {
530                     result.add(new CheckStatus().setCause(this).setMainType(statusType)
531                         .setSubtype(Subtype.gapsInPlaceholderNumbers)
532                         .setMessage("Placeholders {0} should be strictly increasing, starting at zero.", distinctPlaceholders));
533                     break;
534                 }
535                 ++expected;
536             }
537         }
538 
539         // Check if duplicates are allowed
540         if (matchList.size() > countDistinctPlaceholders && placeholderStatus != PlaceholderStatus.MULTIPLE) {
541             Set<String> errors = new LinkedHashSet<>();
542             for (Entry<String> entry : matchList.entrySet()) {
543                 if (entry.getCount() > 1) {
544                     errors.add(entry.getElement());
545                 }
546             }
547             result.add(new CheckStatus().setCause(this).setMainType(statusType)
548                 .setSubtype(Subtype.duplicatePlaceholders)
549                 .setMessage("Duplicate placeholders: {0}.", Joiner.on(", ").join(errors)));
550         }
551 
552         // Now see if the number we have is within bounds
553 
554         if (countDistinctPlaceholders < minimum) {
555             result.add(new CheckStatus().setCause(this).setMainType(statusType)
556                 .setSubtype(Subtype.missingPlaceholders)
557                 .setMessage("Need at least {0} placeholder(s), but only have {1}. Placeholders are: {2}", minimum, countDistinctPlaceholders, placeholderInfo));
558         } else {
559             if (countDistinctPlaceholders > maximum) {
560                 result.add(new CheckStatus().setCause(this).setMainType(statusType)
561                     .setSubtype(Subtype.extraPlaceholders)
562                     .setMessage("Need no more than {0} placeholders, but have too many with {1}.", countDistinctPlaceholders, minimum));
563             }
564         }
565         // Return the pattern with placeholders replaced
566         return matchList.isEmpty() ? value : patternMatcher.replaceAll(STAND_IN);
567     }
568 
569     /**
570      * Checks if ASCII characters are allowed in a currency symbol in the specified locale.
571      * @param localeID the locale ID that the currency is in
572      * @param currency the currency to be checked
573      * @return true if ASCII is not allowed
574      */
asciiNotAllowed(String localeID, String currency)575     private boolean asciiNotAllowed(String localeID, String currency) {
576         // Don't allow ascii at all for bidi scripts.
577         String charOrientation = getResolvedCldrFileToCheck().getStringValue(
578             "//ldml/layout/orientation/characterOrder");
579         if (charOrientation.equals("right-to-left")) {
580             return true;
581         }
582 
583         // Get script of locale. if Latn, quit.
584         LocaleIDParser parser = new LocaleIDParser().set(localeID);
585         String script = parser.getScript();
586         if (script.length() == 0) {
587             localeID = sdi.getLikelySubtags().get(localeID);
588             if (localeID == null) {
589                 localeID = sdi.getLikelySubtags().get(parser.getLanguage());
590                 if (localeID == null) {
591                     throw new IllegalArgumentException(
592                         "A likely subtag for " + parser.getLanguage() +
593                         " is required to get its script.");
594                 }
595             }
596             script = parser.set(localeID).getScript();
597         }
598         if (script.equals("Latn")) {
599             return false;
600         }
601 
602         // Enforce checking of for other non-Latin scripts, for all currencies
603         // whose countries use that script, e.g. Russian should have Cyrillic
604         // currency symbols for modern currencies of countries with official
605         // languages whose script is Cyrillic (Bulgaria, Serbia, ...).
606         Set<String> currencies = getCurrenciesForScript(script);
607         return currencies != null && currencies.contains(currency);
608     }
609 
getCurrenciesForScript(String script)610     private Set<String> getCurrenciesForScript(String script) {
611         if (scriptToCurrencies != null) return scriptToCurrencies.get(script);
612 
613         // Get mapping of scripts to the territories that use that script in
614         // any of their primary languages.
615         Relation scriptToTerritories = new Relation(new HashMap<String, Set<String>>(), HashSet.class);
616         for (String lang : sdi.getBasicLanguageDataLanguages()) {
617             BasicLanguageData langData = sdi.getBasicLanguageDataMap(lang).get(Type.primary);
618             if (langData == null) {
619                 continue;
620             }
621             for (String curScript : langData.getScripts()) {
622                 scriptToTerritories.putAll(curScript, langData.getTerritories());
623             }
624         }
625 
626         // For each territory, get all of its legal tender currencies.
627         Date now = new Date(System.currentTimeMillis());
628         scriptToCurrencies = new Relation(new HashMap<String, Set<String>>(), HashSet.class);
629         for (Object curScript : scriptToTerritories.keySet()) {
630             Set<String> territories = scriptToTerritories.get(curScript);
631             Set<String> currencies = new HashSet<>();
632             for (String territory : territories) {
633                 Set<CurrencyDateInfo> currencyInfo = sdi.getCurrencyDateInfo(territory);
634                 for (CurrencyDateInfo info : currencyInfo) {
635                     if (info.isLegalTender() && info.getEnd().compareTo(now) > 0) {
636                         currencies.add(info.getCurrency());
637                     }
638                 }
639             }
640             scriptToCurrencies.putAll(curScript, currencies);
641         }
642         return scriptToCurrencies.get(script);
643     }
644 
645     /**
646      * Extracts just the text from a date field, replacing all the variable fields by variableReplacement. Return null
647      * if
648      * there is an error (a different test will find that error).
649      */
extractDatePatternText(String value, String variableReplacement, StringBuilder justText)650     public boolean extractDatePatternText(String value, String variableReplacement, StringBuilder justText) {
651         boolean haveText = false;
652         try {
653             formatParser.set(value);
654         } catch (Exception e) {
655             return false; // give up, it is illegal
656         }
657         boolean doReplacement = variableReplacement != null && variableReplacement.length() > 0;
658         justText.setLength(0);
659         for (Object item : formatParser.getItems()) {
660             if (item instanceof String) {
661                 justText.append(item);
662                 haveText = true;
663             } else {
664                 if (doReplacement) {
665                     justText.append(variableReplacement);
666                 }
667             }
668         }
669         return haveText;
670     }
671 
containsPart(String source, String... segments)672     public boolean containsPart(String source, String... segments) {
673         for (int i = 0; i < segments.length; ++i) {
674             if (source.indexOf(segments[i]) > 0) {
675                 return true;
676             }
677         }
678         return false;
679     }
680 
681     static final String TEST = "؉";
682 
addMissingMessage(UnicodeSet missing, CheckStatus.Type warningVsError, Subtype subtype, Subtype subtypeAscii, String qualifier, List<CheckStatus> result)683     private void addMissingMessage(UnicodeSet missing, CheckStatus.Type warningVsError, Subtype subtype,
684         Subtype subtypeAscii,
685         String qualifier, List<CheckStatus> result) {
686         String fixedMissing = prettyPrint.format(missing);
687         BitSet scripts = new BitSet();
688         for (String s : missing) {
689             final int script = UScript.getScript(s.codePointAt(0));
690             if (script == UScript.INHERITED || script == UScript.COMMON) {
691                 continue;
692             }
693             scripts.set(script);
694         }
695         StringBuilder scriptString = new StringBuilder();
696         if (!scripts.isEmpty()) {
697             scriptString.append("{");
698             for (int i = scripts.nextSetBit(0); i >= 0; i = scripts.nextSetBit(i + 1)) {
699                 if (scriptString.length() > 1) {
700                     scriptString.append(", ");
701                 }
702                 scriptString.append(UScript.getName(i));
703             }
704             scriptString.append("}");
705         }
706         final String helpUrl = "http://cldr.unicode.org/translation/-core-data/exemplars#TOC-Handling-Warnings-in-Exemplar-characters";
707         final String message = "The characters \u200E{0}\u200E {1} {2}. "
708             + "For what to do, see <i>Handling Warnings</i> in <a target='CLDR-ST-DOCS' href='"
709             + helpUrl
710             + "'>Exemplar Characters</a>.";
711         result.add(new CheckStatus()
712             .setCause(this)
713             .setMainType(warningVsError)
714             .setSubtype(ASCII.containsAll(missing) ? subtypeAscii : subtype)
715             .setMessage(message, new Object[] { fixedMissing, scriptString, qualifier }));
716     }
717 
718     static final Normalizer2 NFC = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE);
719 
720     /**
721      * Return null if ok, otherwise UnicodeSet of bad characters
722      *
723      * @param exemplarSet
724      * @param value
725      * @return
726      */
containsAllCountingParens(UnicodeSet exemplarSet, UnicodeSet exemplarSetPlusASCII, String value)727     private UnicodeSet containsAllCountingParens(UnicodeSet exemplarSet, UnicodeSet exemplarSetPlusASCII, String value) {
728         UnicodeSet result = null;
729         if (exemplarSet.containsAll(value)) {
730             return result;
731         }
732 
733         // Normalize
734         value = NFC.normalize(value);
735 
736         // if we failed, then check that everything outside of () is ok.
737         // and everything inside parens is either ASCII or in the set
738         int lastPos = 0;
739         while (true) {
740             int start = START_PAREN.findIn(value, lastPos, false);
741             String outside = value.substring(lastPos, start);
742             result = addDisallowedItems(exemplarSet, outside, result);
743             if (start == value.length()) {
744                 break; // all done
745             }
746             ++start;
747             int end = END_PAREN.findIn(value, start, false);
748             // don't worry about mixed brackets
749             String inside = value.substring(start, end);
750             result = addDisallowedItems(exemplarSetPlusASCII, inside, result);
751             if (end == value.length()) {
752                 break; // all done
753             }
754             lastPos = end + 1;
755         }
756         return result;
757     }
758 
addDisallowedItems(UnicodeSet exemplarSet, String outside, UnicodeSet result)759     private UnicodeSet addDisallowedItems(UnicodeSet exemplarSet, String outside, UnicodeSet result) {
760         if (!exemplarSet.containsAll(outside)) {
761             if (result == null) {
762                 result = new UnicodeSet();
763             }
764             result.addAll(new UnicodeSet().addAll(outside).removeAll(exemplarSet));
765         }
766         return result;
767     }
768 }
769