1 package org.unicode.cldr.test; 2 3 import com.google.common.cache.CacheBuilder; 4 import com.google.common.cache.CacheLoader; 5 import com.google.common.cache.LoadingCache; 6 import com.google.common.collect.ImmutableSet; 7 import com.google.common.collect.ImmutableSortedSet; 8 import java.util.Collections; 9 import java.util.EnumSet; 10 import java.util.HashSet; 11 import java.util.Set; 12 import java.util.concurrent.ExecutionException; 13 import java.util.regex.Matcher; 14 import java.util.regex.Pattern; 15 import org.unicode.cldr.util.CLDRConfig; 16 import org.unicode.cldr.util.CLDRLocale; 17 import org.unicode.cldr.util.GrammarInfo; 18 import org.unicode.cldr.util.Level; 19 import org.unicode.cldr.util.Organization; 20 import org.unicode.cldr.util.RegexUtilities; 21 import org.unicode.cldr.util.StandardCodes; 22 import org.unicode.cldr.util.SupplementalDataInfo; 23 import org.unicode.cldr.util.VoterReportStatus; 24 import org.unicode.cldr.util.VoterReportStatus.ReportId; 25 26 /** 27 * This class manages the Limited Submission process. 28 * 29 * <p>TODO: see https://unicode-org.atlassian.net/browse/CLDR-15230 for TODOs here 30 * 31 * @see CheckCLDR#LIMITED_SUBMISSION 32 */ 33 public final class SubmissionLocales { 34 // TODO: On the use of Locales.txt here, see 35 // https://unicode-org.atlassian.net/browse/CLDR-14838 36 /** This is the 'raw' list from Locales.txt */ 37 public static final Set<String> CLDR_LOCALES = 38 StandardCodes.make().getLocaleToLevel(Organization.cldr).keySet(); 39 40 /** This is the 'special' list from Locales.txt */ 41 public static final Set<String> SPECIAL_ORG_LOCALES = 42 StandardCodes.make().getLocaleToLevel(Organization.special).keySet(); 43 44 /** 45 * Non-CLDR Locales, but consistently have high level of engagement from volunteers to keep at 46 * modern level. Reevaluate for each release based on meeting 95+% of modern, moderate, and 47 * basic coverage 48 */ 49 public static Set<String> HIGH_LEVEL_LOCALES = 50 ImmutableSet.of( 51 // Note: ALL of these were found in Locales.txt under cldr. 52 "chr", // Cherokee 53 "gd", // Scottish Gaelic, Gaelic 54 "fo", // Faroese 55 "kok", // Konkani 56 "pcm", // Nigerian Pidgin 57 "ha", // Hausa 58 "hsb", // Upper Sorbian 59 "dsb", // Lower Sorbian 60 "yue_Hans", // Cantonese (Simplified) 61 "to" // Tongan 62 ); 63 64 public static final Set<String> CLDR_OR_HIGH_LEVEL_LOCALES = 65 ImmutableSet.<String>builder().addAll(CLDR_LOCALES).addAll(HIGH_LEVEL_LOCALES).build(); 66 67 /** Subset of reports open for this release */ 68 private static final Set<ReportId> LIMITED_SUBMISSION_REPORTS = 69 Collections.unmodifiableSet(EnumSet.of(VoterReportStatus.ReportId.personnames)); 70 71 /** Subset of CLDR_LOCALES, minus special which are only those which are TC orgs */ 72 public static final Set<String> TC_ORG_LOCALES; 73 74 /** Space-separated list of TC locales to extend submission */ 75 public static final String DEFAULT_EXTENDED_SUBMISSION = ""; 76 77 /** Additional TC locales which have extended submission. Do not add non-tc locales here. */ 78 public static final Set<String> ADDITIONAL_EXTENDED_SUBMISSION = 79 ImmutableSet.copyOf( 80 CLDRConfig.getInstance() 81 .getProperty("CLDR_EXTENDED_SUBMISSION", "") 82 .split(" ")); 83 84 /** 85 * Set to true iff ONLY grammar locales should be limited submission {@link 86 * GrammarInfo#getGrammarLocales()} 87 */ 88 public static final boolean ONLY_GRAMMAR_LOCALES = false; 89 90 /** Update this in each limited release. */ 91 public static final Set<String> LOCALES_FOR_LIMITED; 92 93 static { 94 Set<String> temp = new HashSet<>(CLDR_OR_HIGH_LEVEL_LOCALES); 95 if (ONLY_GRAMMAR_LOCALES) { GrammarInfo.getGrammarLocales()96 temp.retainAll(GrammarInfo.getGrammarLocales()); 97 } 98 LOCALES_FOR_LIMITED = ImmutableSortedSet.copyOf(temp); 99 100 Set<String> temp2 = new HashSet<>(CLDR_LOCALES); 101 temp2.removeAll(SPECIAL_ORG_LOCALES); 102 TC_ORG_LOCALES = ImmutableSortedSet.copyOf(temp2); 103 } 104 105 /** 106 * New locales in this release, where we want to allow any paths even if others are restricted 107 */ 108 public static Set<String> ALLOW_ALL_PATHS_BASIC = 109 ImmutableSet.of( 110 // locales open for v43: 111 "apc", // Levantine Arabic; NB actual submission was "ajp" South Levantine 112 // Arabic 113 "lmo", // Lombardi 114 "pap", // Papiamento 115 "rif" // Riffian 116 ); 117 118 public static Set<String> LOCALES_ALLOWED_IN_LIMITED = 119 ImmutableSet.<String>builder() 120 .addAll(LOCALES_FOR_LIMITED) 121 .addAll(ALLOW_ALL_PATHS_BASIC) 122 .build(); 123 124 public static final Pattern PATHS_ALLOWED_IN_LIMITED = 125 Pattern.compile( 126 "//ldml/" 127 // v43: All person names 128 + "(personNames/.*" 129 // v43: Turkey and its alternate 130 + "|localeDisplayNames/territories/territory\\[@type=\"TR\"\\].*" 131 // v43: Exemplar city for America/Ciudad_Juarez 132 + "|dates/timeZoneNames/zone[@type=\"America/Ciudad_Juarez\"]/exemplarCity" 133 + ")"); 134 135 // Pattern.compile("//ldml/units/unitLength\\[@type=\"long\"]"); 136 137 /* Example of special paths 138 * Pattern.compile( 139 "//ldml/" 140 + "(listPatterns/listPattern\\[@type=\"standard" 141 + "|annotations/annotation\\[@cp=\"([©®‼⁉☑✅✔✖✨✳✴❇❌❎❓-❕❗❣ ➕-➗-⭕⭕]|♀|♂)\"" 142 + "|localeDisplayNames/" 143 + "(scripts/script\\[@type=\"(Elym|Hmnp|Nand|Wcho)\"" 144 + "|territories/territory\\[@type=\"(MO|SZ)\"](\\[@alt=\"variant\"])?" 145 + "|types/type\\[@key=\"numbers\"]\\[@type=\"(hmnp|wcho)\"]" 146 + ")" 147 + "|dates/timeZoneNames/(metazone\\[@type=\"Macau\"]" 148 + "|zone\\[@type=\"Asia/Macau\"]" 149 + ")" 150 + ")" 151 ); 152 */ 153 154 // ldml/dates/timeZoneNames/metazone[@type="Macau"]/long/daylight, old: Macau Summer Time, new: 155 // Macao Summer Time 156 // ldml/dates/timeZoneNames/metazone[@type="Macau"]/long/standard, old: Macau Standard Time, 157 // new: Macao Standard Time 158 // ldml/localeDisplayNames/territories/territory[@type="SZ"][@alt="variant"], old: SZ, new: 159 // Swaziland 160 // ldml/dates/timeZoneNames/zone[@type="Asia/Macau"]/exemplarCity, old: Macau, new: Macao 161 // ldml/dates/timeZoneNames/metazone[@type="Macau"]/long/generic, old: Macau Time, new: Macao 162 // Time 163 // ldml/localeDisplayNames/territories/territory[@type="SZ"], old: Swaziland, new: Eswatini 164 165 private static final class SubmissionLocalesCache { 166 public static SubmissionLocalesCache INSTANCE = new SubmissionLocalesCache(); 167 private LoadingCache<String, CoverageLevel2> covs; 168 SubmissionLocalesCache()169 SubmissionLocalesCache() { 170 covs = 171 CacheBuilder.newBuilder() 172 .build( 173 new CacheLoader<String, CoverageLevel2>() { 174 @Override 175 public CoverageLevel2 load(String key) throws Exception { 176 return CoverageLevel2.getInstance( 177 SupplementalDataInfo.getInstance(), key); 178 } 179 }); 180 } 181 getCoverageLevel(String localeString, String path)182 public static Enum<Level> getCoverageLevel(String localeString, String path) { 183 try { 184 return INSTANCE.covs.get(localeString).getLevel(path); 185 } catch (ExecutionException e) { 186 throw new RuntimeException( 187 String.format("Could not fetch coverage for %s:%s", localeString, path), e); 188 } 189 } 190 } 191 192 /** 193 * Only call this if {@link CheckCLDR#LIMITED_SUBMISSION} 194 * 195 * @param localeString 196 * @param path 197 * @param isError 198 * @param isMissing 199 * @return true if submission is allowed, else false 200 * @see CheckCLDR#LIMITED_SUBMISSION 201 */ allowEvenIfLimited( String localeString, String path, boolean isError, boolean isMissing)202 public static boolean allowEvenIfLimited( 203 String localeString, String path, boolean isError, boolean isMissing) { 204 205 // Allow errors to be fixed 206 if (isError) { 207 return true; 208 } 209 210 // for new locales, allow basic paths 211 if (SubmissionLocales.ALLOW_ALL_PATHS_BASIC.contains(localeString) 212 && 213 // Only check coverage level for these locales 214 isPathBasicOrLess(localeString, path)) { 215 return true; 216 } 217 218 // all but specific locales are otherwise locked 219 if (!SubmissionLocales.LOCALES_ALLOWED_IN_LIMITED.contains(localeString)) { 220 return false; 221 } 222 223 // in TC Org locales, lock all paths except missing and special 224 if (isMissing && TC_ORG_LOCALES.contains(localeString)) { 225 return true; 226 } 227 228 if (pathAllowedInLimitedSubmission(path)) { 229 return true; 230 } 231 232 return false; // skip 233 } 234 isPathBasicOrLess(String localeString, String path)235 private static boolean isPathBasicOrLess(String localeString, String path) { 236 return SubmissionLocalesCache.getCoverageLevel(localeString, path).compareTo(Level.BASIC) 237 <= 0; 238 } 239 240 private static final boolean DEBUG_REGEX = false; 241 242 /** 243 * Only public for testing 244 * 245 * @param path 246 * @return 247 */ pathAllowedInLimitedSubmission(String path)248 public static boolean pathAllowedInLimitedSubmission(String path) { 249 if (PATHS_ALLOWED_IN_LIMITED == null) { 250 return false; 251 } 252 final Matcher matcher = SubmissionLocales.PATHS_ALLOWED_IN_LIMITED.matcher(path); 253 boolean result = matcher.lookingAt(); 254 if (DEBUG_REGEX && !result) { 255 System.out.println(RegexUtilities.showMismatch(matcher, path)); 256 } 257 return result; 258 } 259 getReportsAvailableInLimited()260 public static Set<ReportId> getReportsAvailableInLimited() { 261 return LIMITED_SUBMISSION_REPORTS; 262 } 263 264 /** 265 * @returns true if the locale or its parent is considered a TC Org Locale. Returns true for 266 * ROOT. 267 */ isTcLocale(CLDRLocale loc)268 public static boolean isTcLocale(CLDRLocale loc) { 269 if (loc == CLDRLocale.ROOT 270 || SubmissionLocales.TC_ORG_LOCALES.contains(loc.getBaseName())) { 271 // root or explicitly listed locale is a TC locale 272 return true; 273 } else if (loc.isParentRoot()) { 274 // any sublocale of root not listed is not a tc locale 275 return false; 276 } else { 277 return isTcLocale(loc.getParent()); 278 } 279 } 280 281 /** 282 * @returns true if the locale or its parent is considered a TC Org Locale. Returns true for 283 * ROOT. 284 */ isOpenForExtendedSubmission(CLDRLocale loc)285 public static boolean isOpenForExtendedSubmission(CLDRLocale loc) { 286 if (loc == CLDRLocale.ROOT) { 287 return false; // root is never open 288 } else if (SubmissionLocales.ADDITIONAL_EXTENDED_SUBMISSION.contains(loc.getBaseName())) { 289 // explicitly listed locale is a open for additional 290 return true; 291 } else if (SubmissionLocales.TC_ORG_LOCALES.contains(loc.getBaseName())) { 292 // TC locale but not listed as extended - NOT open for extended submission. 293 return false; 294 } else if (loc.isParentRoot()) { 295 // Not a TC locale, so it's open. 296 return true; 297 } else { 298 // child locale of an open locale is open 299 return isOpenForExtendedSubmission(loc.getParent()); 300 } 301 } 302 } 303