• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.test;
2 
3 import com.google.common.cache.CacheBuilder;
4 import com.google.common.cache.CacheLoader;
5 import com.google.common.cache.LoadingCache;
6 import com.google.common.collect.ImmutableSet;
7 import com.google.common.collect.ImmutableSortedSet;
8 import java.util.Collections;
9 import java.util.EnumSet;
10 import java.util.HashSet;
11 import java.util.Set;
12 import java.util.concurrent.ExecutionException;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15 import org.unicode.cldr.util.CLDRConfig;
16 import org.unicode.cldr.util.CLDRLocale;
17 import org.unicode.cldr.util.GrammarInfo;
18 import org.unicode.cldr.util.Level;
19 import org.unicode.cldr.util.Organization;
20 import org.unicode.cldr.util.RegexUtilities;
21 import org.unicode.cldr.util.StandardCodes;
22 import org.unicode.cldr.util.SupplementalDataInfo;
23 import org.unicode.cldr.util.VoterReportStatus;
24 import org.unicode.cldr.util.VoterReportStatus.ReportId;
25 
26 /**
27  * This class manages the Limited Submission process.
28  *
29  * <p>TODO: see https://unicode-org.atlassian.net/browse/CLDR-15230 for TODOs here
30  *
31  * @see CheckCLDR#LIMITED_SUBMISSION
32  */
33 public final class SubmissionLocales {
34     // TODO:  On the use of Locales.txt here, see
35     // https://unicode-org.atlassian.net/browse/CLDR-14838
36     /** This is the 'raw' list from Locales.txt */
37     public static final Set<String> CLDR_LOCALES =
38             StandardCodes.make().getLocaleToLevel(Organization.cldr).keySet();
39 
40     /** This is the 'special' list from Locales.txt */
41     public static final Set<String> SPECIAL_ORG_LOCALES =
42             StandardCodes.make().getLocaleToLevel(Organization.special).keySet();
43 
44     /**
45      * Non-CLDR Locales, but consistently have high level of engagement from volunteers to keep at
46      * modern level. Reevaluate for each release based on meeting 95+% of modern, moderate, and
47      * basic coverage
48      */
49     public static Set<String> HIGH_LEVEL_LOCALES =
50             ImmutableSet.of(
51                     // Note: ALL of these were found in Locales.txt under cldr.
52                     "chr", // Cherokee
53                     "gd", // Scottish Gaelic, Gaelic
54                     "fo", // Faroese
55                     "kok", // Konkani
56                     "pcm", // Nigerian Pidgin
57                     "ha", // Hausa
58                     "hsb", // Upper Sorbian
59                     "dsb", // Lower Sorbian
60                     "yue_Hans", // Cantonese (Simplified)
61                     "to" //  Tongan
62                     );
63 
64     public static final Set<String> CLDR_OR_HIGH_LEVEL_LOCALES =
65             ImmutableSet.<String>builder().addAll(CLDR_LOCALES).addAll(HIGH_LEVEL_LOCALES).build();
66 
67     /** Subset of reports open for this release */
68     private static final Set<ReportId> LIMITED_SUBMISSION_REPORTS =
69             Collections.unmodifiableSet(EnumSet.of(VoterReportStatus.ReportId.personnames));
70 
71     /** Subset of CLDR_LOCALES, minus special which are only those which are TC orgs */
72     public static final Set<String> TC_ORG_LOCALES;
73 
74     /** Space-separated list of TC locales to extend submission */
75     public static final String DEFAULT_EXTENDED_SUBMISSION = "";
76 
77     /** Additional TC locales which have extended submission. Do not add non-tc locales here. */
78     public static final Set<String> ADDITIONAL_EXTENDED_SUBMISSION =
79             ImmutableSet.copyOf(
80                     CLDRConfig.getInstance()
81                             .getProperty("CLDR_EXTENDED_SUBMISSION", "")
82                             .split(" "));
83 
84     /**
85      * Set to true iff ONLY grammar locales should be limited submission {@link
86      * GrammarInfo#getGrammarLocales()}
87      */
88     public static final boolean ONLY_GRAMMAR_LOCALES = false;
89 
90     /** Update this in each limited release. */
91     public static final Set<String> LOCALES_FOR_LIMITED;
92 
93     static {
94         Set<String> temp = new HashSet<>(CLDR_OR_HIGH_LEVEL_LOCALES);
95         if (ONLY_GRAMMAR_LOCALES) {
GrammarInfo.getGrammarLocales()96             temp.retainAll(GrammarInfo.getGrammarLocales());
97         }
98         LOCALES_FOR_LIMITED = ImmutableSortedSet.copyOf(temp);
99 
100         Set<String> temp2 = new HashSet<>(CLDR_LOCALES);
101         temp2.removeAll(SPECIAL_ORG_LOCALES);
102         TC_ORG_LOCALES = ImmutableSortedSet.copyOf(temp2);
103     }
104 
105     /**
106      * New locales in this release, where we want to allow any paths even if others are restricted
107      */
108     public static Set<String> ALLOW_ALL_PATHS_BASIC =
109             ImmutableSet.of(
110                     // locales open for v43:
111                     "apc", // Levantine Arabic; NB actual submission was "ajp" South Levantine
112                     // Arabic
113                     "lmo", // Lombardi
114                     "pap", // Papiamento
115                     "rif" // Riffian
116                     );
117 
118     public static Set<String> LOCALES_ALLOWED_IN_LIMITED =
119             ImmutableSet.<String>builder()
120                     .addAll(LOCALES_FOR_LIMITED)
121                     .addAll(ALLOW_ALL_PATHS_BASIC)
122                     .build();
123 
124     public static final Pattern PATHS_ALLOWED_IN_LIMITED =
125             Pattern.compile(
126                     "//ldml/"
127                             // v43: All person names
128                             + "(personNames/.*"
129                             // v43: Turkey and its alternate
130                             + "|localeDisplayNames/territories/territory\\[@type=\"TR\"\\].*"
131                             // v43: Exemplar city for America/Ciudad_Juarez
132                             + "|dates/timeZoneNames/zone[@type=\"America/Ciudad_Juarez\"]/exemplarCity"
133                             + ")");
134 
135     // Pattern.compile("//ldml/units/unitLength\\[@type=\"long\"]");
136 
137     /* Example of special paths
138     * Pattern.compile(
139        "//ldml/"
140            + "(listPatterns/listPattern\\[@type=\"standard"
141            + "|annotations/annotation\\[@cp=\"([©®‼⁉☑✅✔✖✨✳✴❇❌❎❓-❕❗❣ ➕-➗��-��������������������������⭕��������������⭕��������������������]|��‍♀|��‍♂)\""
142            + "|localeDisplayNames/"
143            +   "(scripts/script\\[@type=\"(Elym|Hmnp|Nand|Wcho)\""
144            +    "|territories/territory\\[@type=\"(MO|SZ)\"](\\[@alt=\"variant\"])?"
145            +    "|types/type\\[@key=\"numbers\"]\\[@type=\"(hmnp|wcho)\"]"
146            +   ")"
147            + "|dates/timeZoneNames/(metazone\\[@type=\"Macau\"]"
148            +   "|zone\\[@type=\"Asia/Macau\"]"
149            +   ")"
150            + ")"
151            );
152            */
153 
154     // ldml/dates/timeZoneNames/metazone[@type="Macau"]/long/daylight, old: Macau Summer Time, new:
155     // Macao Summer Time
156     // ldml/dates/timeZoneNames/metazone[@type="Macau"]/long/standard, old: Macau Standard Time,
157     // new: Macao Standard Time
158     // ldml/localeDisplayNames/territories/territory[@type="SZ"][@alt="variant"], old: SZ, new:
159     // Swaziland
160     // ldml/dates/timeZoneNames/zone[@type="Asia/Macau"]/exemplarCity, old: Macau, new: Macao
161     // ldml/dates/timeZoneNames/metazone[@type="Macau"]/long/generic, old: Macau Time, new: Macao
162     // Time
163     // ldml/localeDisplayNames/territories/territory[@type="SZ"], old: Swaziland, new: Eswatini
164 
165     private static final class SubmissionLocalesCache {
166         public static SubmissionLocalesCache INSTANCE = new SubmissionLocalesCache();
167         private LoadingCache<String, CoverageLevel2> covs;
168 
SubmissionLocalesCache()169         SubmissionLocalesCache() {
170             covs =
171                     CacheBuilder.newBuilder()
172                             .build(
173                                     new CacheLoader<String, CoverageLevel2>() {
174                                         @Override
175                                         public CoverageLevel2 load(String key) throws Exception {
176                                             return CoverageLevel2.getInstance(
177                                                     SupplementalDataInfo.getInstance(), key);
178                                         }
179                                     });
180         }
181 
getCoverageLevel(String localeString, String path)182         public static Enum<Level> getCoverageLevel(String localeString, String path) {
183             try {
184                 return INSTANCE.covs.get(localeString).getLevel(path);
185             } catch (ExecutionException e) {
186                 throw new RuntimeException(
187                         String.format("Could not fetch coverage for %s:%s", localeString, path), e);
188             }
189         }
190     }
191 
192     /**
193      * Only call this if {@link CheckCLDR#LIMITED_SUBMISSION}
194      *
195      * @param localeString
196      * @param path
197      * @param isError
198      * @param isMissing
199      * @return true if submission is allowed, else false
200      * @see CheckCLDR#LIMITED_SUBMISSION
201      */
allowEvenIfLimited( String localeString, String path, boolean isError, boolean isMissing)202     public static boolean allowEvenIfLimited(
203             String localeString, String path, boolean isError, boolean isMissing) {
204 
205         // Allow errors to be fixed
206         if (isError) {
207             return true;
208         }
209 
210         // for new locales, allow basic paths
211         if (SubmissionLocales.ALLOW_ALL_PATHS_BASIC.contains(localeString)
212                 &&
213                 // Only check coverage level for these locales
214                 isPathBasicOrLess(localeString, path)) {
215             return true;
216         }
217 
218         // all but specific locales are otherwise locked
219         if (!SubmissionLocales.LOCALES_ALLOWED_IN_LIMITED.contains(localeString)) {
220             return false;
221         }
222 
223         // in TC Org locales, lock all paths except missing and special
224         if (isMissing && TC_ORG_LOCALES.contains(localeString)) {
225             return true;
226         }
227 
228         if (pathAllowedInLimitedSubmission(path)) {
229             return true;
230         }
231 
232         return false; // skip
233     }
234 
isPathBasicOrLess(String localeString, String path)235     private static boolean isPathBasicOrLess(String localeString, String path) {
236         return SubmissionLocalesCache.getCoverageLevel(localeString, path).compareTo(Level.BASIC)
237                 <= 0;
238     }
239 
240     private static final boolean DEBUG_REGEX = false;
241 
242     /**
243      * Only public for testing
244      *
245      * @param path
246      * @return
247      */
pathAllowedInLimitedSubmission(String path)248     public static boolean pathAllowedInLimitedSubmission(String path) {
249         if (PATHS_ALLOWED_IN_LIMITED == null) {
250             return false;
251         }
252         final Matcher matcher = SubmissionLocales.PATHS_ALLOWED_IN_LIMITED.matcher(path);
253         boolean result = matcher.lookingAt();
254         if (DEBUG_REGEX && !result) {
255             System.out.println(RegexUtilities.showMismatch(matcher, path));
256         }
257         return result;
258     }
259 
getReportsAvailableInLimited()260     public static Set<ReportId> getReportsAvailableInLimited() {
261         return LIMITED_SUBMISSION_REPORTS;
262     }
263 
264     /**
265      * @returns true if the locale or its parent is considered a TC Org Locale. Returns true for
266      *     ROOT.
267      */
isTcLocale(CLDRLocale loc)268     public static boolean isTcLocale(CLDRLocale loc) {
269         if (loc == CLDRLocale.ROOT
270                 || SubmissionLocales.TC_ORG_LOCALES.contains(loc.getBaseName())) {
271             // root or explicitly listed locale is a TC locale
272             return true;
273         } else if (loc.isParentRoot()) {
274             // any sublocale of root not listed is not a tc locale
275             return false;
276         } else {
277             return isTcLocale(loc.getParent());
278         }
279     }
280 
281     /**
282      * @returns true if the locale or its parent is considered a TC Org Locale. Returns true for
283      *     ROOT.
284      */
isOpenForExtendedSubmission(CLDRLocale loc)285     public static boolean isOpenForExtendedSubmission(CLDRLocale loc) {
286         if (loc == CLDRLocale.ROOT) {
287             return false; // root is never open
288         } else if (SubmissionLocales.ADDITIONAL_EXTENDED_SUBMISSION.contains(loc.getBaseName())) {
289             // explicitly listed locale is a open for additional
290             return true;
291         } else if (SubmissionLocales.TC_ORG_LOCALES.contains(loc.getBaseName())) {
292             // TC locale but not listed as extended - NOT open for extended submission.
293             return false;
294         } else if (loc.isParentRoot()) {
295             // Not a TC locale, so it's open.
296             return true;
297         } else {
298             // child locale of an open locale is open
299             return isOpenForExtendedSubmission(loc.getParent());
300         }
301     }
302 }
303