• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 2015-2016, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 package ohos.global.icu.impl.locale;
11 
12 import java.util.Arrays;
13 import java.util.EnumSet;
14 import java.util.HashSet;
15 import java.util.Set;
16 import java.util.regex.Pattern;
17 
18 import ohos.global.icu.impl.ValidIdentifiers;
19 import ohos.global.icu.impl.ValidIdentifiers.Datasubtype;
20 import ohos.global.icu.impl.ValidIdentifiers.Datatype;
21 import ohos.global.icu.impl.locale.KeyTypeData.ValueType;
22 import ohos.global.icu.util.IllformedLocaleException;
23 import ohos.global.icu.util.Output;
24 import ohos.global.icu.util.ULocale;
25 
26 /**
27  * @author markdavis
28  * @hide exposed on OHOS
29  *
30  */
31 public class LocaleValidityChecker {
32     private final Set<Datasubtype> datasubtypes;
33     private final boolean allowsDeprecated;
34     /**
35      * @hide exposed on OHOS
36      */
37     public static class Where {
38         public Datatype fieldFailure;
39         public String codeFailure;
40 
set(Datatype datatype, String code)41         public boolean set(Datatype datatype, String code) {
42             fieldFailure = datatype;
43             codeFailure = code;
44             return false;
45         }
46         @Override
toString()47         public String toString() {
48             return fieldFailure == null ? "OK" : "{" + fieldFailure + ", " + codeFailure + "}";
49         }
50     }
51 
LocaleValidityChecker(Set<Datasubtype> datasubtypes)52     public LocaleValidityChecker(Set<Datasubtype> datasubtypes) {
53         this.datasubtypes = EnumSet.copyOf(datasubtypes);
54         allowsDeprecated = datasubtypes.contains(Datasubtype.deprecated);
55     }
56 
LocaleValidityChecker(Datasubtype... datasubtypes)57     public LocaleValidityChecker(Datasubtype... datasubtypes) {
58         this.datasubtypes = EnumSet.copyOf(Arrays.asList(datasubtypes));
59         allowsDeprecated = this.datasubtypes.contains(Datasubtype.deprecated);
60     }
61 
62     /**
63      * @return the datasubtypes
64      */
getDatasubtypes()65     public Set<Datasubtype> getDatasubtypes() {
66         return EnumSet.copyOf(datasubtypes);
67     }
68 
69     static Pattern SEPARATOR = Pattern.compile("[-_]");
70 
71     @SuppressWarnings("unused")
72     private static final Pattern VALID_X = Pattern.compile("[a-zA-Z0-9]{2,8}(-[a-zA-Z0-9]{2,8})*");
73 
isValid(ULocale locale, Where where)74     public boolean isValid(ULocale locale, Where where) {
75         where.set(null, null);
76         final String language = locale.getLanguage();
77         final String script = locale.getScript();
78         final String region = locale.getCountry();
79         final String variantString = locale.getVariant();
80         final Set<Character> extensionKeys = locale.getExtensionKeys();
81         //        if (language.isEmpty()) {
82         //            // the only case where this is valid is if there is only an 'x' extension string
83         //            if (!script.isEmpty() || !region.isEmpty() || variantString.isEmpty()
84         //                    || extensionKeys.size() != 1 || !extensionKeys.contains('x')) {
85         //                return where.set(Datatype.x, "Null language only with x-...");
86         //            }
87         //            return true; // for x string, wellformedness = valid
88         //        }
89         if (!isValid(Datatype.language, language, where)) {
90             // special case x
91             if (language.equals("x")) {
92                 where.set(null, null); // for x, well-formed == valid
93                 return true;
94             }
95             return false;
96         }
97         if (!isValid(Datatype.script, script, where)) return false;
98         if (!isValid(Datatype.region, region, where)) return false;
99         if (!variantString.isEmpty()) {
100             for (String variant : SEPARATOR.split(variantString)) {
101                 if (!isValid(Datatype.variant, variant, where)) return false;
102             }
103         }
104         for (Character c : extensionKeys) {
105             try {
106                 Datatype datatype = Datatype.valueOf(c+"");
107                 switch (datatype) {
108                 case x:
109                     return true; // if it is syntactic (checked by ULocale) it is valid
110                 case t:
111                 case u:
112                     if (!isValidU(locale, datatype, locale.getExtension(c), where)) return false;
113                     break;
114                 default:
115                     break;
116                 }
117             } catch (Exception e) {
118                 return where.set(Datatype.illegal, c+"");
119             }
120         }
121         return true;
122     }
123 
124     // TODO combine this with the KeyTypeData.SpecialType, and get it from the type, not the key
125     enum SpecialCase {
126         normal, anything, reorder, codepoints, subdivision, rgKey;
get(String key)127         static SpecialCase get(String key) {
128             if (key.equals("kr")) {
129                 return reorder;
130             } else if (key.equals("vt")) {
131                 return codepoints;
132             } else if (key.equals("sd")) {
133                 return subdivision;
134             } else if (key.equals("rg")) {
135                 return rgKey;
136             } else if (key.equals("x0")) {
137                 return anything;
138             } else {
139                 return normal;
140             }
141         }
142     }
143 
144     /**
145      * @param locale
146      * @param datatype
147      * @param extension
148      * @param where
149      * @return
150      */
isValidU(ULocale locale, Datatype datatype, String extensionString, Where where)151     private boolean isValidU(ULocale locale, Datatype datatype, String extensionString, Where where) {
152         String key = "";
153         int typeCount = 0;
154         ValueType valueType = null;
155         SpecialCase specialCase = null;
156         StringBuilder prefix = new StringBuilder();
157         Set<String> seen = new HashSet<String>();
158 
159         StringBuilder tBuffer = datatype == Datatype.t ? new StringBuilder() : null;
160 
161         // TODO: is empty -u- valid?
162 
163         for (String subtag : SEPARATOR.split(extensionString)) {
164             if (subtag.length() == 2
165                     && (tBuffer == null || subtag.charAt(1) <= '9')) {
166                 // if we have accumulated a t buffer, check that first
167                 if (tBuffer != null) {
168                     // Check t buffer. Empty after 't' is ok.
169                     if (tBuffer.length() != 0 && !isValidLocale(tBuffer.toString(),where)) {
170                         return false;
171                     }
172                     tBuffer = null;
173                 }
174                 key = KeyTypeData.toBcpKey(subtag);
175                 if (key == null) {
176                     return where.set(datatype, subtag);
177                 }
178                 if (!allowsDeprecated && KeyTypeData.isDeprecated(key)) {
179                     return where.set(datatype, key);
180                 }
181                 valueType = KeyTypeData.getValueType(key);
182                 specialCase = SpecialCase.get(key);
183                 typeCount = 0;
184             } else if (tBuffer != null) {
185                 if (tBuffer.length() != 0) {
186                     tBuffer.append('-');
187                 }
188                 tBuffer.append(subtag);
189             } else {
190                 ++typeCount;
191                 switch (valueType) {
192                 case single:
193                     if (typeCount > 1) {
194                         return where.set(datatype, key+"-"+subtag);
195                     }
196                     break;
197                 case incremental:
198                     if (typeCount == 1) {
199                         prefix.setLength(0);
200                         prefix.append(subtag);
201                     } else {
202                         prefix.append('-').append(subtag);
203                         subtag = prefix.toString();
204                     }
205                     break;
206                 case multiple:
207                     if (typeCount == 1) {
208                         seen.clear();
209                     }
210                     break;
211                 default:
212                     break;
213                 }
214                 switch (specialCase) {
215                 case anything:
216                     continue;
217                 case codepoints:
218                     try {
219                         if (Integer.parseInt(subtag,16) > 0x10FFFF) {
220                             return where.set(datatype, key+"-"+subtag);
221                         }
222                     } catch (NumberFormatException e) {
223                         return where.set(datatype, key+"-"+subtag);
224                     }
225                     continue;
226                 case reorder:
227                     boolean newlyAdded = seen.add(subtag.equals("zzzz") ? "others" : subtag);
228                     if (!newlyAdded || !isScriptReorder(subtag)) {
229                         return where.set(datatype, key+"-"+subtag);
230                     }
231                     continue;
232                 case subdivision:
233                     if (!isSubdivision(locale, subtag)) {
234                         return where.set(datatype, key+"-"+subtag);
235                     }
236                     continue;
237                 case rgKey:
238                     if (subtag.length() < 6 || !subtag.endsWith("zzzz")) {
239                         return where.set(datatype, subtag);
240                     }
241                     if (!isValid(Datatype.region, subtag.substring(0,subtag.length()-4), where)) {
242                         return false;
243                     }
244                     continue;
245                 default:
246                     break;
247                 }
248 
249                 // en-u-sd-usca
250                 // en-US-u-sd-usca
251                 Output<Boolean> isKnownKey = new Output<Boolean>();
252                 Output<Boolean> isSpecialType = new Output<Boolean>();
253                 String type = KeyTypeData.toBcpType(key, subtag, isKnownKey, isSpecialType);
254                 if (type == null) {
255                     return where.set(datatype, key+"-"+subtag);
256                 }
257                 if (!allowsDeprecated && KeyTypeData.isDeprecated(key, subtag)) {
258                     return where.set(datatype, key+"-"+subtag);
259                 }
260             }
261         }
262         // Check t buffer. Empty after 't' is ok.
263         if (tBuffer != null && tBuffer.length() != 0 && !isValidLocale(tBuffer.toString(),where)) {
264             return false;
265         }
266         return true;
267     }
268 
269     /**
270      * @param locale
271      * @param subtag
272      * @return
273      */
isSubdivision(ULocale locale, String subtag)274     private boolean isSubdivision(ULocale locale, String subtag) {
275         // First check if the subtag is valid
276         if (subtag.length() < 3) {
277             return false;
278         }
279         String region = subtag.substring(0, subtag.charAt(0) <= '9' ? 3 : 2);
280         String subdivision = subtag.substring(region.length());
281         if (ValidIdentifiers.isValid(Datatype.subdivision, datasubtypes, region, subdivision) == null) {
282             return false;
283         }
284         // Then check for consistency with the locale's region
285         String localeRegion = locale.getCountry();
286         if (localeRegion.isEmpty()) {
287             ULocale max = ULocale.addLikelySubtags(locale);
288             localeRegion = max.getCountry();
289         }
290         if (!region.equalsIgnoreCase(localeRegion)) {
291             return false;
292         }
293         return true;
294     }
295 
296     static final Set<String> REORDERING_INCLUDE = new HashSet<String>(Arrays.asList("space", "punct", "symbol", "currency", "digit", "others", "zzzz"));
297     static final Set<String> REORDERING_EXCLUDE = new HashSet<String>(Arrays.asList("zinh", "zyyy"));
298     static final Set<Datasubtype> REGULAR_ONLY = EnumSet.of(Datasubtype.regular);
299     /**
300      * @param subtag
301      * @return
302      */
isScriptReorder(String subtag)303     private boolean isScriptReorder(String subtag) {
304         subtag = AsciiUtil.toLowerString(subtag);
305         if (REORDERING_INCLUDE.contains(subtag)) {
306             return true;
307         } else if (REORDERING_EXCLUDE.contains(subtag)) {
308             return false;
309         }
310         return ValidIdentifiers.isValid(Datatype.script, REGULAR_ONLY, subtag) != null;
311         //        space, punct, symbol, currency, digit - core groups of characters below 'a'
312         //        any script code except Common and Inherited.
313         //      sc ; Zinh                             ; Inherited                        ; Qaai
314         //      sc ; Zyyy                             ; Common
315         //        Some pairs of scripts sort primary-equal and always reorder together. For example, Katakana characters are are always reordered with Hiragana.
316         //        others - where all codes not explicitly mentioned should be ordered. The script code Zzzz (Unknown Script) is a synonym for others.        return false;
317     }
318 
319     /**
320      * @param extensionString
321      * @param where
322      * @return
323      */
isValidLocale(String extensionString, Where where)324     private boolean isValidLocale(String extensionString, Where where) {
325         try {
326             ULocale locale = new ULocale.Builder().setLanguageTag(extensionString).build();
327             return isValid(locale, where);
328         } catch (IllformedLocaleException e) {
329             int startIndex = e.getErrorIndex();
330             String[] list = SEPARATOR.split(extensionString.substring(startIndex));
331             return where.set(Datatype.t, list[0]);
332         } catch (Exception e) {
333             return where.set(Datatype.t, e.getMessage());
334         }
335     }
336 
337     /**
338      * @param datatype
339      * @param code
340      * @param where
341      * @return
342      */
isValid(Datatype datatype, String code, Where where)343     private boolean isValid(Datatype datatype, String code, Where where) {
344         if (code.isEmpty()) {
345             return true;
346         }
347 
348         // Note:
349         // BCP 47 -u- locale extension '-u-va-posix' is mapped to variant 'posix' automatically.
350         // For example, ULocale.forLanguageTag("en-u-va-posix").getVariant() returns "posix".
351         // This is only the exceptional case when -u- locale extension is mapped to a subtag type
352         // other than keyword.
353         //
354         // The locale validity data is based on IANA language subtag registry data and "posix"
355         // is not a valid variant. So we need to handle this specific case here. There are no
356         // othe exceptions.
357         if (datatype == Datatype.variant && "posix".equalsIgnoreCase(code)) {
358             return true;
359         }
360 
361         return ValidIdentifiers.isValid(datatype, datasubtypes, code) != null ?
362                 true : (where == null ? false : where.set(datatype, code));
363     }
364 }
365