1 package org.unicode.cldr.test; 2 3 import java.util.List; 4 import java.util.Map; 5 import java.util.Map.Entry; 6 import java.util.Set; 7 8 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 9 import org.unicode.cldr.util.CLDRFile; 10 import org.unicode.cldr.util.CLDRFile.Status; 11 import org.unicode.cldr.util.CldrUtility; 12 import org.unicode.cldr.util.Factory; 13 import org.unicode.cldr.util.LanguageTagParser; 14 import org.unicode.cldr.util.RegexLookup; 15 import org.unicode.cldr.util.XPathParts; 16 17 import com.google.common.collect.ImmutableSet; 18 import com.ibm.icu.lang.CharSequences; 19 import com.ibm.icu.text.UnicodeSet; 20 import com.ibm.icu.util.ICUException; 21 22 public class CheckForCopy extends FactoryCheckCLDR { 23 24 private static final boolean DEBUG = CldrUtility.getProperty("DEBUG", false); 25 CheckForCopy(Factory factory)26 public CheckForCopy(Factory factory) { 27 super(factory); 28 } 29 30 private static final RegexLookup<Boolean> skip = new RegexLookup<Boolean>() 31 .add("/(availableFormats" + 32 "|exponential" + 33 "|nan" + 34 "|availableFormats" + 35 "|intervalFormatItem" + 36 "|exemplarCharacters\\[@type=\"(currencySymbol|index)\"]" + 37 "|scientificFormat" + 38 "|timeZoneNames/(hourFormat|gmtFormat|gmtZeroFormat)" + 39 "|dayPeriod" + 40 "|(monthWidth|dayWidth|quarterWidth)\\[@type=\"(narrow|abbreviated)\"]" + 41 "|exemplarCity" + 42 // "|localeDisplayNames/(scripts|territories)" + 43 "|currency\\[@type=\"[A-Z]+\"]/symbol" + 44 "|pattern" + 45 "|field\\[@type=\"dayperiod\"]" + 46 "|defaultNumberingSystem" + 47 "|otherNumberingSystems" + 48 "|exemplarCharacters" + 49 "|durationUnitPattern" + 50 "|coordinateUnitPattern" + 51 "|unitLength\\[@type=\"(short|narrow)\"\\]/unit\\[@type=\"[^\"]++\"\\]/unitPattern\\[@count=\"[^\"]++\"\\]" + 52 "|unitLength\\[@type=\"(short|narrow)\"\\]/unit\\[@type=\"[^\"]++\"\\]/perUnitPattern" + 53 ")", true) 54 .add("^//ldml/dates/calendars/calendar\\[@type=\"gregorian\"]", false) 55 .add("^//ldml/dates/calendars/calendar", true); 56 57 private static final RegexLookup<Boolean> SKIP_CODE_CHECK = new RegexLookup<Boolean>() 58 .add("^//ldml/characterLabels/characterLabel", true) 59 .add("^//ldml/dates/fields/field\\[@type=\"(era|week|minute|quarter)\"]/displayName", true) 60 .add("^//ldml/localeDisplayNames/scripts/script\\[@type=\"(Jamo|Thai|Ahom|Loma|Moon|Newa)\"]", true) 61 .add("^//ldml/localeDisplayNames/languages/language\\[@type=\"(fon|gan|luo|tiv|yao|vai)\"]", true) 62 .add("^//ldml/dates/timeZoneNames/metazone\\[@type=\"GMT\"]", true) 63 .add("^//ldml/localeDisplayNames/territories/territory\\[@type=\"[^\"]*+\"]\\[@alt=\"short\"]", true) 64 .add("^//ldml/localeDisplayNames/measurementSystemNames/measurementSystemName", true) 65 .add("^//ldml/localeDisplayNames/types/type\\[@key=\"collation\"]\\[@type=\"standard\"]", true) 66 ; 67 68 private static final Set<String> SKIP_TYPES = ImmutableSet.of( 69 "CHF", "EUR", "XPD", 70 "Vaii", "Yiii", "Thai", 71 "SAAHO", "BOONT", "SCOUSE", 72 "fon", "ijo", "luo", "tiv", "yao", "zu", "zza", "tw", "ur", "vo", "ha", "hi", "ig", "yo", "ak", "vai", 73 "eo", "af", 74 "Cuba", 75 // languages that are the same in English as in themselves 76 // and countries that have the same name as English in one of their official languages. 77 "af", // Afrikaans 78 "ak", // Akan 79 "AD", // Andorra 80 "LI", // Liechtenstein 81 "NA", // Namibia 82 "AR", // Argentina 83 "CO", // Colombia 84 "VE", // Venezuela 85 "CL", // Chile 86 "CU", // Cuba 87 "EC", // Ecuador 88 "GT", // Guatemala 89 "BO", // Bolivia 90 "HN", // Honduras 91 "SV", // El Salvador 92 "CR", // Costa Rica 93 "PR", // Puerto Rico 94 "NI", // Nicaragua 95 "UY", // Uruguay 96 "PY", // Paraguay 97 "fil", // Filipino 98 "FR", // France 99 "MG", // Madagascar 100 "CA", // Canada 101 "CI", // Côte d’Ivoire 102 "BI", // Burundi 103 "ML", // Mali 104 "TG", // Togo 105 "NE", // Niger 106 "BF", // Burkina Faso 107 "RE", // Réunion 108 "GA", // Gabon 109 "LU", // Luxembourg 110 "MQ", // Martinique 111 "GP", // Guadeloupe 112 "YT", // Mayotte 113 "VU", // Vanuatu 114 "SC", // Seychelles 115 "MC", // Monaco 116 "DJ", // Djibouti 117 "RW", // Rwanda 118 "ha", // Hausa 119 "ID", // Indonesia 120 "ig", // Igbo 121 "NG", // Nigeria 122 "SM", // San Marino 123 "kln", // Kalenjin 124 "mg", // Malagasy 125 "MY", // Malaysia 126 "BN", // Brunei 127 "MT", // Malta 128 "ZW", // Zimbabwe 129 "SR", // Suriname 130 "AW", // Aruba 131 "PT", // Portugal 132 "AO", // Angola 133 "TL", // Timor-Leste 134 "RS", // Serbia 135 "rw", // Kinyarwanda 136 "RW", // Rwanda 137 "ZW", // Zimbabwe 138 "FI", // Finland 139 "TZ", // Tanzania 140 "KE", // Kenya 141 "UG", // Uganda 142 "TO", // Tonga 143 "wae", // Walser 144 "metric"); 145 146 static UnicodeSet ASCII_LETTER = new UnicodeSet("[a-zA-Z]"); 147 148 enum Failure { 149 ok, same_as_english, same_as_code 150 } 151 handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)152 public CheckCLDR handleCheck(String path, String fullPath, String value, 153 Options options, List<CheckStatus> result) { 154 155 if (fullPath == null || value == null) return this; // skip paths that we don't have 156 if (value.contentEquals("Hanb")) { 157 int debug = 0; 158 } 159 160 Status status = new Status(); 161 162 String loc = getCldrFileToCheck().getSourceLocaleID(path, status); 163 if (!getCldrFileToCheck().getLocaleID().equals(loc) || !path.equals(status.pathWhereFound)) { 164 String topStringValue = getCldrFileToCheck().getUnresolved().getStringValue(path); 165 if (!CldrUtility.INHERITANCE_MARKER.equals(topStringValue)) { 166 return this; 167 } 168 } 169 170 if (Boolean.TRUE == skip.get(path)) { 171 return this; 172 } 173 174 Failure failure = Failure.ok; 175 176 String english = getDisplayInformation().getStringValue(path); 177 if (CharSequences.equals(english, value)) { 178 if (ASCII_LETTER.containsSome(english)) { 179 failure = Failure.same_as_english; 180 } 181 } 182 183 // Check for attributes. 184 // May override English test 185 if (Boolean.TRUE != SKIP_CODE_CHECK.get(path)) { 186 XPathParts parts = XPathParts.getFrozenInstance(path); 187 188 int elementCount = parts.size(); 189 for (int i = 2; i < elementCount; ++i) { 190 Map<String, String> attributes = parts.getAttributes(i); 191 for (Entry<String, String> attributeEntry : attributes.entrySet()) { 192 final String attributeValue = attributeEntry.getValue(); 193 // if (SKIP_TYPES.contains(attributeValue)) { 194 // failure = Failure.ok; // override English test 195 // break; 196 // } 197 try { 198 if (value.equals(attributeValue)) { 199 failure = Failure.same_as_code; 200 break; 201 } 202 } catch (NullPointerException e) { 203 throw new ICUException("Value: " + value + "\nattributeValue: " + attributeValue 204 + "\nPath: " + path, e); 205 } 206 } 207 } 208 } 209 210 switch (failure) { 211 case same_as_english: 212 result 213 .add(new CheckStatus() 214 .setCause(this) 215 .setMainType(CheckStatus.warningType) 216 .setSubtype(Subtype.sameAsEnglish) 217 .setCheckOnSubmit(false) 218 .setMessage( 219 "The value is the same as in English: see <a target='CLDR-ST-DOCS' href='http://cldr.org/translation/fixing-errors'>Fixing Errors and Warnings</a>.", 220 new Object[] {})); 221 break; 222 case same_as_code: 223 result 224 .add(new CheckStatus() 225 .setCause(this) 226 .setMainType(CheckStatus.errorType) 227 .setSubtype(Subtype.sameAsCode) 228 .setCheckOnSubmit(false) 229 .setMessage( 230 "The value is the same as the 'code': see <a target='CLDR-ST-DOCS' href='http://cldr.org/translation/fixing-errors'>Fixing Errors and Warnings</a>.", 231 new Object[] {})); 232 break; 233 default: 234 } 235 return this; 236 } 237 238 @Override setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)239 public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, 240 List<CheckStatus> possibleErrors) { 241 if (cldrFileToCheck == null) return this; 242 243 final String localeID = cldrFileToCheck.getLocaleID(); 244 LanguageTagParser ltp = new LanguageTagParser().set(localeID); 245 String lang = ltp.getLanguage(); 246 UnicodeSet exemplars = cldrFileToCheck.getExemplarSet("main", CLDRFile.WinningChoice.WINNING); 247 248 // Don't skip non-Latin, because the exemplar set will only have warning 249 250 if (lang.equals("en") || lang.equals("root")) {// || exemplars != null && ASCII_LETTER.containsNone(exemplars)) { 251 setSkipTest(true); 252 if (DEBUG) { 253 System.out.println("CheckForCopy: Skipping: " + localeID); 254 } 255 return this; 256 } 257 258 super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 259 return this; 260 } 261 } 262