• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.test;
2 
3 import java.util.List;
4 import java.util.Map;
5 import java.util.Map.Entry;
6 import java.util.Set;
7 
8 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
9 import org.unicode.cldr.util.CLDRFile;
10 import org.unicode.cldr.util.CLDRFile.Status;
11 import org.unicode.cldr.util.CldrUtility;
12 import org.unicode.cldr.util.Factory;
13 import org.unicode.cldr.util.LanguageTagParser;
14 import org.unicode.cldr.util.RegexLookup;
15 import org.unicode.cldr.util.XPathParts;
16 
17 import com.google.common.collect.ImmutableSet;
18 import com.ibm.icu.lang.CharSequences;
19 import com.ibm.icu.text.UnicodeSet;
20 import com.ibm.icu.util.ICUException;
21 
22 public class CheckForCopy extends FactoryCheckCLDR {
23 
24     private static final boolean DEBUG = CldrUtility.getProperty("DEBUG", false);
25 
CheckForCopy(Factory factory)26     public CheckForCopy(Factory factory) {
27         super(factory);
28     }
29 
30     private static final RegexLookup<Boolean> skip = new RegexLookup<Boolean>()
31         .add("/(availableFormats" +
32             "|exponential" +
33             "|nan" +
34             "|availableFormats" +
35             "|intervalFormatItem" +
36             "|exemplarCharacters\\[@type=\"(currencySymbol|index)\"]" +
37             "|scientificFormat" +
38             "|timeZoneNames/(hourFormat|gmtFormat|gmtZeroFormat)" +
39             "|dayPeriod" +
40             "|(monthWidth|dayWidth|quarterWidth)\\[@type=\"(narrow|abbreviated)\"]" +
41             "|exemplarCity" +
42             // "|localeDisplayNames/(scripts|territories)" +
43             "|currency\\[@type=\"[A-Z]+\"]/symbol" +
44             "|pattern" +
45             "|field\\[@type=\"dayperiod\"]" +
46             "|defaultNumberingSystem" +
47             "|otherNumberingSystems" +
48             "|exemplarCharacters" +
49             "|durationUnitPattern" +
50             "|coordinateUnitPattern" +
51             "|unitLength\\[@type=\"(short|narrow)\"\\]/unit\\[@type=\"[^\"]++\"\\]/unitPattern\\[@count=\"[^\"]++\"\\]" +
52             "|unitLength\\[@type=\"(short|narrow)\"\\]/unit\\[@type=\"[^\"]++\"\\]/perUnitPattern" +
53             ")", true)
54         .add("^//ldml/dates/calendars/calendar\\[@type=\"gregorian\"]", false)
55         .add("^//ldml/dates/calendars/calendar", true);
56 
57     private static final RegexLookup<Boolean> SKIP_CODE_CHECK = new RegexLookup<Boolean>()
58         .add("^//ldml/characterLabels/characterLabel", true)
59         .add("^//ldml/dates/fields/field\\[@type=\"(era|week|minute|quarter)\"]/displayName", true)
60         .add("^//ldml/localeDisplayNames/scripts/script\\[@type=\"(Jamo|Thai|Ahom|Loma|Moon|Newa)\"]", true)
61         .add("^//ldml/localeDisplayNames/languages/language\\[@type=\"(fon|gan|luo|tiv|yao|vai)\"]", true)
62         .add("^//ldml/dates/timeZoneNames/metazone\\[@type=\"GMT\"]", true)
63         .add("^//ldml/localeDisplayNames/territories/territory\\[@type=\"[^\"]*+\"]\\[@alt=\"short\"]", true)
64         .add("^//ldml/localeDisplayNames/measurementSystemNames/measurementSystemName", true)
65         .add("^//ldml/localeDisplayNames/types/type\\[@key=\"collation\"]\\[@type=\"standard\"]", true)
66         ;
67 
68     private static final Set<String> SKIP_TYPES = ImmutableSet.of(
69         "CHF", "EUR", "XPD",
70         "Vaii", "Yiii", "Thai",
71         "SAAHO", "BOONT", "SCOUSE",
72         "fon", "ijo", "luo", "tiv", "yao", "zu", "zza", "tw", "ur", "vo", "ha", "hi", "ig", "yo", "ak", "vai",
73         "eo", "af",
74         "Cuba",
75         // languages that are the same in English as in themselves
76         // and countries that have the same name as English in one of their official languages.
77         "af", // Afrikaans
78         "ak", // Akan
79         "AD", // Andorra
80         "LI", // Liechtenstein
81         "NA", // Namibia
82         "AR", // Argentina
83         "CO", // Colombia
84         "VE", // Venezuela
85         "CL", // Chile
86         "CU", // Cuba
87         "EC", // Ecuador
88         "GT", // Guatemala
89         "BO", // Bolivia
90         "HN", // Honduras
91         "SV", // El Salvador
92         "CR", // Costa Rica
93         "PR", // Puerto Rico
94         "NI", // Nicaragua
95         "UY", // Uruguay
96         "PY", // Paraguay
97         "fil", // Filipino
98         "FR", // France
99         "MG", // Madagascar
100         "CA", // Canada
101         "CI", // Côte d’Ivoire
102         "BI", // Burundi
103         "ML", // Mali
104         "TG", // Togo
105         "NE", // Niger
106         "BF", // Burkina Faso
107         "RE", // Réunion
108         "GA", // Gabon
109         "LU", // Luxembourg
110         "MQ", // Martinique
111         "GP", // Guadeloupe
112         "YT", // Mayotte
113         "VU", // Vanuatu
114         "SC", // Seychelles
115         "MC", // Monaco
116         "DJ", // Djibouti
117         "RW", // Rwanda
118         "ha", // Hausa
119         "ID", // Indonesia
120         "ig", // Igbo
121         "NG", // Nigeria
122         "SM", // San Marino
123         "kln", // Kalenjin
124         "mg", // Malagasy
125         "MY", // Malaysia
126         "BN", // Brunei
127         "MT", // Malta
128         "ZW", // Zimbabwe
129         "SR", // Suriname
130         "AW", // Aruba
131         "PT", // Portugal
132         "AO", // Angola
133         "TL", // Timor-Leste
134         "RS", // Serbia
135         "rw", // Kinyarwanda
136         "RW", // Rwanda
137         "ZW", // Zimbabwe
138         "FI", // Finland
139         "TZ", // Tanzania
140         "KE", // Kenya
141         "UG", // Uganda
142         "TO", // Tonga
143         "wae", // Walser
144         "metric");
145 
146     static UnicodeSet ASCII_LETTER = new UnicodeSet("[a-zA-Z]");
147 
148     enum Failure {
149         ok, same_as_english, same_as_code
150     }
151 
handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)152     public CheckCLDR handleCheck(String path, String fullPath, String value,
153         Options options, List<CheckStatus> result) {
154 
155         if (fullPath == null || value == null) return this; // skip paths that we don't have
156         if (value.contentEquals("Hanb")) {
157             int debug = 0;
158         }
159 
160         Status status = new Status();
161 
162         String loc = getCldrFileToCheck().getSourceLocaleID(path, status);
163         if (!getCldrFileToCheck().getLocaleID().equals(loc) || !path.equals(status.pathWhereFound)) {
164             String topStringValue = getCldrFileToCheck().getUnresolved().getStringValue(path);
165             if (!CldrUtility.INHERITANCE_MARKER.equals(topStringValue)) {
166                 return this;
167             }
168         }
169 
170         if (Boolean.TRUE == skip.get(path)) {
171             return this;
172         }
173 
174         Failure failure = Failure.ok;
175 
176         String english = getDisplayInformation().getStringValue(path);
177         if (CharSequences.equals(english, value)) {
178             if (ASCII_LETTER.containsSome(english)) {
179                 failure = Failure.same_as_english;
180             }
181         }
182 
183         // Check for attributes.
184         // May override English test
185         if (Boolean.TRUE != SKIP_CODE_CHECK.get(path)) {
186             XPathParts parts = XPathParts.getFrozenInstance(path);
187 
188             int elementCount = parts.size();
189             for (int i = 2; i < elementCount; ++i) {
190                 Map<String, String> attributes = parts.getAttributes(i);
191                 for (Entry<String, String> attributeEntry : attributes.entrySet()) {
192                     final String attributeValue = attributeEntry.getValue();
193                     //                    if (SKIP_TYPES.contains(attributeValue)) {
194                     //                        failure = Failure.ok; // override English test
195                     //                        break;
196                     //                    }
197                     try {
198                         if (value.equals(attributeValue)) {
199                             failure = Failure.same_as_code;
200                             break;
201                         }
202                     } catch (NullPointerException e) {
203                         throw new ICUException("Value: " + value + "\nattributeValue: " + attributeValue
204                             + "\nPath: " + path, e);
205                     }
206                 }
207             }
208         }
209 
210         switch (failure) {
211         case same_as_english:
212             result
213             .add(new CheckStatus()
214                 .setCause(this)
215                 .setMainType(CheckStatus.warningType)
216                 .setSubtype(Subtype.sameAsEnglish)
217                 .setCheckOnSubmit(false)
218                 .setMessage(
219                     "The value is the same as in English: see <a target='CLDR-ST-DOCS' href='http://cldr.org/translation/fixing-errors'>Fixing Errors and Warnings</a>.",
220                     new Object[] {}));
221             break;
222         case same_as_code:
223             result
224             .add(new CheckStatus()
225                 .setCause(this)
226                 .setMainType(CheckStatus.errorType)
227                 .setSubtype(Subtype.sameAsCode)
228                 .setCheckOnSubmit(false)
229                 .setMessage(
230                     "The value is the same as the 'code': see <a target='CLDR-ST-DOCS' href='http://cldr.org/translation/fixing-errors'>Fixing Errors and Warnings</a>.",
231                     new Object[] {}));
232             break;
233         default:
234         }
235         return this;
236     }
237 
238     @Override
setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)239     public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options,
240         List<CheckStatus> possibleErrors) {
241         if (cldrFileToCheck == null) return this;
242 
243         final String localeID = cldrFileToCheck.getLocaleID();
244         LanguageTagParser ltp = new LanguageTagParser().set(localeID);
245         String lang = ltp.getLanguage();
246         UnicodeSet exemplars = cldrFileToCheck.getExemplarSet("main", CLDRFile.WinningChoice.WINNING);
247 
248         // Don't skip non-Latin, because the exemplar set will only have warning
249 
250         if (lang.equals("en") || lang.equals("root")) {// || exemplars != null && ASCII_LETTER.containsNone(exemplars)) {
251             setSkipTest(true);
252             if (DEBUG) {
253                 System.out.println("CheckForCopy: Skipping: " + localeID);
254             }
255             return this;
256         }
257 
258         super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors);
259         return this;
260     }
261 }
262