1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2015-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 package ohos.global.icu.impl.locale; 11 12 import java.util.Arrays; 13 import java.util.EnumSet; 14 import java.util.HashSet; 15 import java.util.Set; 16 import java.util.regex.Pattern; 17 18 import ohos.global.icu.impl.ValidIdentifiers; 19 import ohos.global.icu.impl.ValidIdentifiers.Datasubtype; 20 import ohos.global.icu.impl.ValidIdentifiers.Datatype; 21 import ohos.global.icu.impl.locale.KeyTypeData.ValueType; 22 import ohos.global.icu.util.IllformedLocaleException; 23 import ohos.global.icu.util.Output; 24 import ohos.global.icu.util.ULocale; 25 26 /** 27 * @author markdavis 28 * @hide exposed on OHOS 29 * 30 */ 31 public class LocaleValidityChecker { 32 private final Set<Datasubtype> datasubtypes; 33 private final boolean allowsDeprecated; 34 /** 35 * @hide exposed on OHOS 36 */ 37 public static class Where { 38 public Datatype fieldFailure; 39 public String codeFailure; 40 set(Datatype datatype, String code)41 public boolean set(Datatype datatype, String code) { 42 fieldFailure = datatype; 43 codeFailure = code; 44 return false; 45 } 46 @Override toString()47 public String toString() { 48 return fieldFailure == null ? "OK" : "{" + fieldFailure + ", " + codeFailure + "}"; 49 } 50 } 51 LocaleValidityChecker(Set<Datasubtype> datasubtypes)52 public LocaleValidityChecker(Set<Datasubtype> datasubtypes) { 53 this.datasubtypes = EnumSet.copyOf(datasubtypes); 54 allowsDeprecated = datasubtypes.contains(Datasubtype.deprecated); 55 } 56 LocaleValidityChecker(Datasubtype... datasubtypes)57 public LocaleValidityChecker(Datasubtype... datasubtypes) { 58 this.datasubtypes = EnumSet.copyOf(Arrays.asList(datasubtypes)); 59 allowsDeprecated = this.datasubtypes.contains(Datasubtype.deprecated); 60 } 61 62 /** 63 * @return the datasubtypes 64 */ getDatasubtypes()65 public Set<Datasubtype> getDatasubtypes() { 66 return EnumSet.copyOf(datasubtypes); 67 } 68 69 static Pattern SEPARATOR = Pattern.compile("[-_]"); 70 71 @SuppressWarnings("unused") 72 private static final Pattern VALID_X = Pattern.compile("[a-zA-Z0-9]{2,8}(-[a-zA-Z0-9]{2,8})*"); 73 isValid(ULocale locale, Where where)74 public boolean isValid(ULocale locale, Where where) { 75 where.set(null, null); 76 final String language = locale.getLanguage(); 77 final String script = locale.getScript(); 78 final String region = locale.getCountry(); 79 final String variantString = locale.getVariant(); 80 final Set<Character> extensionKeys = locale.getExtensionKeys(); 81 // if (language.isEmpty()) { 82 // // the only case where this is valid is if there is only an 'x' extension string 83 // if (!script.isEmpty() || !region.isEmpty() || variantString.isEmpty() 84 // || extensionKeys.size() != 1 || !extensionKeys.contains('x')) { 85 // return where.set(Datatype.x, "Null language only with x-..."); 86 // } 87 // return true; // for x string, wellformedness = valid 88 // } 89 if (!isValid(Datatype.language, language, where)) { 90 // special case x 91 if (language.equals("x")) { 92 where.set(null, null); // for x, well-formed == valid 93 return true; 94 } 95 return false; 96 } 97 if (!isValid(Datatype.script, script, where)) return false; 98 if (!isValid(Datatype.region, region, where)) return false; 99 if (!variantString.isEmpty()) { 100 for (String variant : SEPARATOR.split(variantString)) { 101 if (!isValid(Datatype.variant, variant, where)) return false; 102 } 103 } 104 for (Character c : extensionKeys) { 105 try { 106 Datatype datatype = Datatype.valueOf(c+""); 107 switch (datatype) { 108 case x: 109 return true; // if it is syntactic (checked by ULocale) it is valid 110 case t: 111 case u: 112 if (!isValidU(locale, datatype, locale.getExtension(c), where)) return false; 113 break; 114 default: 115 break; 116 } 117 } catch (Exception e) { 118 return where.set(Datatype.illegal, c+""); 119 } 120 } 121 return true; 122 } 123 124 // TODO combine this with the KeyTypeData.SpecialType, and get it from the type, not the key 125 enum SpecialCase { 126 normal, anything, reorder, codepoints, subdivision, rgKey; get(String key)127 static SpecialCase get(String key) { 128 if (key.equals("kr")) { 129 return reorder; 130 } else if (key.equals("vt")) { 131 return codepoints; 132 } else if (key.equals("sd")) { 133 return subdivision; 134 } else if (key.equals("rg")) { 135 return rgKey; 136 } else if (key.equals("x0")) { 137 return anything; 138 } else { 139 return normal; 140 } 141 } 142 } 143 144 /** 145 * @param locale 146 * @param datatype 147 * @param extension 148 * @param where 149 * @return 150 */ isValidU(ULocale locale, Datatype datatype, String extensionString, Where where)151 private boolean isValidU(ULocale locale, Datatype datatype, String extensionString, Where where) { 152 String key = ""; 153 int typeCount = 0; 154 ValueType valueType = null; 155 SpecialCase specialCase = null; 156 StringBuilder prefix = new StringBuilder(); 157 Set<String> seen = new HashSet<String>(); 158 159 StringBuilder tBuffer = datatype == Datatype.t ? new StringBuilder() : null; 160 161 // TODO: is empty -u- valid? 162 163 for (String subtag : SEPARATOR.split(extensionString)) { 164 if (subtag.length() == 2 165 && (tBuffer == null || subtag.charAt(1) <= '9')) { 166 // if we have accumulated a t buffer, check that first 167 if (tBuffer != null) { 168 // Check t buffer. Empty after 't' is ok. 169 if (tBuffer.length() != 0 && !isValidLocale(tBuffer.toString(),where)) { 170 return false; 171 } 172 tBuffer = null; 173 } 174 key = KeyTypeData.toBcpKey(subtag); 175 if (key == null) { 176 return where.set(datatype, subtag); 177 } 178 if (!allowsDeprecated && KeyTypeData.isDeprecated(key)) { 179 return where.set(datatype, key); 180 } 181 valueType = KeyTypeData.getValueType(key); 182 specialCase = SpecialCase.get(key); 183 typeCount = 0; 184 } else if (tBuffer != null) { 185 if (tBuffer.length() != 0) { 186 tBuffer.append('-'); 187 } 188 tBuffer.append(subtag); 189 } else { 190 ++typeCount; 191 switch (valueType) { 192 case single: 193 if (typeCount > 1) { 194 return where.set(datatype, key+"-"+subtag); 195 } 196 break; 197 case incremental: 198 if (typeCount == 1) { 199 prefix.setLength(0); 200 prefix.append(subtag); 201 } else { 202 prefix.append('-').append(subtag); 203 subtag = prefix.toString(); 204 } 205 break; 206 case multiple: 207 if (typeCount == 1) { 208 seen.clear(); 209 } 210 break; 211 default: 212 break; 213 } 214 switch (specialCase) { 215 case anything: 216 continue; 217 case codepoints: 218 try { 219 if (Integer.parseInt(subtag,16) > 0x10FFFF) { 220 return where.set(datatype, key+"-"+subtag); 221 } 222 } catch (NumberFormatException e) { 223 return where.set(datatype, key+"-"+subtag); 224 } 225 continue; 226 case reorder: 227 boolean newlyAdded = seen.add(subtag.equals("zzzz") ? "others" : subtag); 228 if (!newlyAdded || !isScriptReorder(subtag)) { 229 return where.set(datatype, key+"-"+subtag); 230 } 231 continue; 232 case subdivision: 233 if (!isSubdivision(locale, subtag)) { 234 return where.set(datatype, key+"-"+subtag); 235 } 236 continue; 237 case rgKey: 238 if (subtag.length() < 6 || !subtag.endsWith("zzzz")) { 239 return where.set(datatype, subtag); 240 } 241 if (!isValid(Datatype.region, subtag.substring(0,subtag.length()-4), where)) { 242 return false; 243 } 244 continue; 245 default: 246 break; 247 } 248 249 // en-u-sd-usca 250 // en-US-u-sd-usca 251 Output<Boolean> isKnownKey = new Output<Boolean>(); 252 Output<Boolean> isSpecialType = new Output<Boolean>(); 253 String type = KeyTypeData.toBcpType(key, subtag, isKnownKey, isSpecialType); 254 if (type == null) { 255 return where.set(datatype, key+"-"+subtag); 256 } 257 if (!allowsDeprecated && KeyTypeData.isDeprecated(key, subtag)) { 258 return where.set(datatype, key+"-"+subtag); 259 } 260 } 261 } 262 // Check t buffer. Empty after 't' is ok. 263 if (tBuffer != null && tBuffer.length() != 0 && !isValidLocale(tBuffer.toString(),where)) { 264 return false; 265 } 266 return true; 267 } 268 269 /** 270 * @param locale 271 * @param subtag 272 * @return 273 */ isSubdivision(ULocale locale, String subtag)274 private boolean isSubdivision(ULocale locale, String subtag) { 275 // First check if the subtag is valid 276 if (subtag.length() < 3) { 277 return false; 278 } 279 String region = subtag.substring(0, subtag.charAt(0) <= '9' ? 3 : 2); 280 String subdivision = subtag.substring(region.length()); 281 if (ValidIdentifiers.isValid(Datatype.subdivision, datasubtypes, region, subdivision) == null) { 282 return false; 283 } 284 // Then check for consistency with the locale's region 285 String localeRegion = locale.getCountry(); 286 if (localeRegion.isEmpty()) { 287 ULocale max = ULocale.addLikelySubtags(locale); 288 localeRegion = max.getCountry(); 289 } 290 if (!region.equalsIgnoreCase(localeRegion)) { 291 return false; 292 } 293 return true; 294 } 295 296 static final Set<String> REORDERING_INCLUDE = new HashSet<String>(Arrays.asList("space", "punct", "symbol", "currency", "digit", "others", "zzzz")); 297 static final Set<String> REORDERING_EXCLUDE = new HashSet<String>(Arrays.asList("zinh", "zyyy")); 298 static final Set<Datasubtype> REGULAR_ONLY = EnumSet.of(Datasubtype.regular); 299 /** 300 * @param subtag 301 * @return 302 */ isScriptReorder(String subtag)303 private boolean isScriptReorder(String subtag) { 304 subtag = AsciiUtil.toLowerString(subtag); 305 if (REORDERING_INCLUDE.contains(subtag)) { 306 return true; 307 } else if (REORDERING_EXCLUDE.contains(subtag)) { 308 return false; 309 } 310 return ValidIdentifiers.isValid(Datatype.script, REGULAR_ONLY, subtag) != null; 311 // space, punct, symbol, currency, digit - core groups of characters below 'a' 312 // any script code except Common and Inherited. 313 // sc ; Zinh ; Inherited ; Qaai 314 // sc ; Zyyy ; Common 315 // Some pairs of scripts sort primary-equal and always reorder together. For example, Katakana characters are are always reordered with Hiragana. 316 // others - where all codes not explicitly mentioned should be ordered. The script code Zzzz (Unknown Script) is a synonym for others. return false; 317 } 318 319 /** 320 * @param extensionString 321 * @param where 322 * @return 323 */ isValidLocale(String extensionString, Where where)324 private boolean isValidLocale(String extensionString, Where where) { 325 try { 326 ULocale locale = new ULocale.Builder().setLanguageTag(extensionString).build(); 327 return isValid(locale, where); 328 } catch (IllformedLocaleException e) { 329 int startIndex = e.getErrorIndex(); 330 String[] list = SEPARATOR.split(extensionString.substring(startIndex)); 331 return where.set(Datatype.t, list[0]); 332 } catch (Exception e) { 333 return where.set(Datatype.t, e.getMessage()); 334 } 335 } 336 337 /** 338 * @param datatype 339 * @param code 340 * @param where 341 * @return 342 */ isValid(Datatype datatype, String code, Where where)343 private boolean isValid(Datatype datatype, String code, Where where) { 344 if (code.isEmpty()) { 345 return true; 346 } 347 348 // Note: 349 // BCP 47 -u- locale extension '-u-va-posix' is mapped to variant 'posix' automatically. 350 // For example, ULocale.forLanguageTag("en-u-va-posix").getVariant() returns "posix". 351 // This is only the exceptional case when -u- locale extension is mapped to a subtag type 352 // other than keyword. 353 // 354 // The locale validity data is based on IANA language subtag registry data and "posix" 355 // is not a valid variant. So we need to handle this specific case here. There are no 356 // othe exceptions. 357 if (datatype == Datatype.variant && "posix".equalsIgnoreCase(code)) { 358 return true; 359 } 360 361 return ValidIdentifiers.isValid(datatype, datasubtypes, code) != null ? 362 true : (where == null ? false : where.set(datatype, code)); 363 } 364 } 365