1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2009-2010, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.impl.locale; 10 11 import java.util.ArrayList; 12 import java.util.Collections; 13 import java.util.HashMap; 14 import java.util.HashSet; 15 import java.util.List; 16 import java.util.Set; 17 18 public final class InternalLocaleBuilder { 19 20 private static final boolean JDKIMPL = false; 21 22 private String _language = ""; 23 private String _script = ""; 24 private String _region = ""; 25 private String _variant = ""; 26 27 private static final CaseInsensitiveChar PRIVUSE_KEY = new CaseInsensitiveChar(LanguageTag.PRIVATEUSE.charAt(0)); 28 29 private HashMap<CaseInsensitiveChar, String> _extensions; 30 private HashSet<CaseInsensitiveString> _uattributes; 31 private HashMap<CaseInsensitiveString, String> _ukeywords; 32 33 InternalLocaleBuilder()34 public InternalLocaleBuilder() { 35 } 36 setLanguage(String language)37 public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException { 38 if (language == null || language.length() == 0) { 39 _language = ""; 40 } else { 41 if (!LanguageTag.isLanguage(language)) { 42 throw new LocaleSyntaxException("Ill-formed language: " + language, 0); 43 } 44 _language = language; 45 } 46 return this; 47 } 48 setScript(String script)49 public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException { 50 if (script == null || script.length() == 0) { 51 _script = ""; 52 } else { 53 if (!LanguageTag.isScript(script)) { 54 throw new LocaleSyntaxException("Ill-formed script: " + script, 0); 55 } 56 _script = script; 57 } 58 return this; 59 } 60 setRegion(String region)61 public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException { 62 if (region == null || region.length() == 0) { 63 _region = ""; 64 } else { 65 if (!LanguageTag.isRegion(region)) { 66 throw new LocaleSyntaxException("Ill-formed region: " + region, 0); 67 } 68 _region = region; 69 } 70 return this; 71 } 72 setVariant(String variant)73 public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException { 74 if (variant == null || variant.length() == 0) { 75 _variant = ""; 76 } else { 77 // normalize separators to "_" 78 String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP); 79 int errIdx = checkVariants(var, BaseLocale.SEP); 80 if (errIdx != -1) { 81 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); 82 } 83 _variant = var; 84 } 85 return this; 86 } 87 addUnicodeLocaleAttribute(String attribute)88 public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { 89 if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) { 90 throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); 91 } 92 // Use case insensitive string to prevent duplication 93 if (_uattributes == null) { 94 _uattributes = new HashSet<CaseInsensitiveString>(4); 95 } 96 _uattributes.add(new CaseInsensitiveString(attribute)); 97 return this; 98 } 99 removeUnicodeLocaleAttribute(String attribute)100 public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { 101 if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) { 102 throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); 103 } 104 if (_uattributes != null) { 105 _uattributes.remove(new CaseInsensitiveString(attribute)); 106 } 107 return this; 108 } 109 setUnicodeLocaleKeyword(String key, String type)110 public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException { 111 if (!UnicodeLocaleExtension.isKey(key)) { 112 throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key); 113 } 114 115 CaseInsensitiveString cikey = new CaseInsensitiveString(key); 116 if (type == null) { 117 if (_ukeywords != null) { 118 // null type is used for remove the key 119 _ukeywords.remove(cikey); 120 } 121 } else { 122 if (type.length() != 0) { 123 // normalize separator to "-" 124 String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 125 // validate 126 StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP); 127 while (!itr.isDone()) { 128 String s = itr.current(); 129 if (!UnicodeLocaleExtension.isTypeSubtag(s)) { 130 throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: " + type, itr.currentStart()); 131 } 132 itr.next(); 133 } 134 } 135 if (_ukeywords == null) { 136 _ukeywords = new HashMap<CaseInsensitiveString, String>(4); 137 } 138 _ukeywords.put(cikey, type); 139 } 140 return this; 141 } 142 setExtension(char singleton, String value)143 public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException { 144 // validate key 145 boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton); 146 if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) { 147 throw new LocaleSyntaxException("Ill-formed extension key: " + singleton); 148 } 149 150 boolean remove = (value == null || value.length() == 0); 151 CaseInsensitiveChar key = new CaseInsensitiveChar(singleton); 152 153 if (remove) { 154 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 155 // clear entire Unicode locale extension 156 if (_uattributes != null) { 157 _uattributes.clear(); 158 } 159 if (_ukeywords != null) { 160 _ukeywords.clear(); 161 } 162 } else { 163 if (_extensions != null && _extensions.containsKey(key)) { 164 _extensions.remove(key); 165 } 166 } 167 } else { 168 // validate value 169 String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 170 StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP); 171 while (!itr.isDone()) { 172 String s = itr.current(); 173 boolean validSubtag; 174 if (isBcpPrivateuse) { 175 validSubtag = LanguageTag.isPrivateuseSubtag(s); 176 } else { 177 validSubtag = LanguageTag.isExtensionSubtag(s); 178 } 179 if (!validSubtag) { 180 throw new LocaleSyntaxException("Ill-formed extension value: " + s, itr.currentStart()); 181 } 182 itr.next(); 183 } 184 185 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 186 setUnicodeLocaleExtension(val); 187 } else { 188 if (_extensions == null) { 189 _extensions = new HashMap<CaseInsensitiveChar, String>(4); 190 } 191 _extensions.put(key, val); 192 } 193 } 194 return this; 195 } 196 197 /* 198 * Set extension/private subtags in a single string representation 199 */ setExtensions(String subtags)200 public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException { 201 if (subtags == null || subtags.length() == 0) { 202 clearExtensions(); 203 return this; 204 } 205 subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 206 StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); 207 208 List<String> extensions = null; 209 String privateuse = null; 210 211 int parsed = 0; 212 int start; 213 214 // Make a list of extension subtags 215 while (!itr.isDone()) { 216 String s = itr.current(); 217 if (LanguageTag.isExtensionSingleton(s)) { 218 start = itr.currentStart(); 219 String singleton = s; 220 StringBuilder sb = new StringBuilder(singleton); 221 222 itr.next(); 223 while (!itr.isDone()) { 224 s = itr.current(); 225 if (LanguageTag.isExtensionSubtag(s)) { 226 sb.append(LanguageTag.SEP).append(s); 227 parsed = itr.currentEnd(); 228 } else { 229 break; 230 } 231 itr.next(); 232 } 233 234 if (parsed < start) { 235 throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'", start); 236 } 237 238 if (extensions == null) { 239 extensions = new ArrayList<String>(4); 240 } 241 extensions.add(sb.toString()); 242 } else { 243 break; 244 } 245 } 246 if (!itr.isDone()) { 247 String s = itr.current(); 248 if (LanguageTag.isPrivateusePrefix(s)) { 249 start = itr.currentStart(); 250 StringBuilder sb = new StringBuilder(s); 251 252 itr.next(); 253 while (!itr.isDone()) { 254 s = itr.current(); 255 if (!LanguageTag.isPrivateuseSubtag(s)) { 256 break; 257 } 258 sb.append(LanguageTag.SEP).append(s); 259 parsed = itr.currentEnd(); 260 261 itr.next(); 262 } 263 if (parsed <= start) { 264 throw new LocaleSyntaxException("Incomplete privateuse:" + subtags.substring(start), start); 265 } else { 266 privateuse = sb.toString(); 267 } 268 } 269 } 270 271 if (!itr.isDone()) { 272 throw new LocaleSyntaxException("Ill-formed extension subtags:" + subtags.substring(itr.currentStart()), itr.currentStart()); 273 } 274 275 return setExtensions(extensions, privateuse); 276 } 277 278 /* 279 * Set a list of BCP47 extensions and private use subtags 280 * BCP47 extensions are already validated and well-formed, but may contain duplicates 281 */ setExtensions(List<String> bcpExtensions, String privateuse)282 private InternalLocaleBuilder setExtensions(List<String> bcpExtensions, String privateuse) { 283 clearExtensions(); 284 285 if (bcpExtensions != null && bcpExtensions.size() > 0) { 286 HashSet<CaseInsensitiveChar> processedExtensions = new HashSet<CaseInsensitiveChar>(bcpExtensions.size()); 287 for (String bcpExt : bcpExtensions) { 288 CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt.charAt(0)); 289 // ignore duplicates 290 if (!processedExtensions.contains(key)) { 291 // each extension string contains singleton, e.g. "a-abc-def" 292 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 293 setUnicodeLocaleExtension(bcpExt.substring(2)); 294 } else { 295 if (_extensions == null) { 296 _extensions = new HashMap<CaseInsensitiveChar, String>(4); 297 } 298 _extensions.put(key, bcpExt.substring(2)); 299 } 300 } 301 } 302 } 303 if (privateuse != null && privateuse.length() > 0) { 304 // privateuse string contains prefix, e.g. "x-abc-def" 305 if (_extensions == null) { 306 _extensions = new HashMap<CaseInsensitiveChar, String>(1); 307 } 308 _extensions.put(new CaseInsensitiveChar(privateuse.charAt(0)), privateuse.substring(2)); 309 } 310 311 return this; 312 } 313 314 /* 315 * Reset Builder's internal state with the given language tag 316 */ setLanguageTag(LanguageTag langtag)317 public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) { 318 clear(); 319 if (langtag.getExtlangs().size() > 0) { 320 _language = langtag.getExtlangs().get(0); 321 } else { 322 String language = langtag.getLanguage(); 323 if (!language.equals(LanguageTag.UNDETERMINED)) { 324 _language = language; 325 } 326 } 327 _script = langtag.getScript(); 328 _region = langtag.getRegion(); 329 330 ArrayList<String> bcpVariants = new ArrayList<String>(langtag.getVariants()); 331 Collections.sort(bcpVariants); 332 if (bcpVariants.size() > 0) { 333 StringBuilder var = new StringBuilder(bcpVariants.get(0)); 334 for (int i = 1; i < bcpVariants.size(); i++) { 335 var.append(BaseLocale.SEP).append(bcpVariants.get(i)); 336 } 337 _variant = var.toString(); 338 } 339 340 setExtensions(langtag.getExtensions(), langtag.getPrivateuse()); 341 342 return this; 343 } 344 setLocale(BaseLocale base, LocaleExtensions extensions)345 public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions extensions) throws LocaleSyntaxException { 346 String language = base.getLanguage(); 347 String script = base.getScript(); 348 String region = base.getRegion(); 349 String variant = base.getVariant(); 350 351 if (JDKIMPL) { 352 // Special backward compatibility support 353 354 // Exception 1 - ja_JP_JP 355 if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) { 356 // When locale ja_JP_JP is created, ca-japanese is always there. 357 // The builder ignores the variant "JP" 358 assert("japanese".equals(extensions.getUnicodeLocaleType("ca"))); 359 variant = ""; 360 } 361 // Exception 2 - th_TH_TH 362 else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) { 363 // When locale th_TH_TH is created, nu-thai is always there. 364 // The builder ignores the variant "TH" 365 assert("thai".equals(extensions.getUnicodeLocaleType("nu"))); 366 variant = ""; 367 } 368 // Exception 3 - no_NO_NY 369 else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) { 370 // no_NO_NY is a valid locale and used by Java 6 or older versions. 371 // The build ignores the variant "NY" and change the language to "nn". 372 language = "nn"; 373 variant = ""; 374 } 375 } 376 377 // Validate base locale fields before updating internal state. 378 // LocaleExtensions always store validated/canonicalized values, 379 // so no checks are necessary. 380 if (language.length() > 0 && !LanguageTag.isLanguage(language)) { 381 throw new LocaleSyntaxException("Ill-formed language: " + language); 382 } 383 384 if (script.length() > 0 && !LanguageTag.isScript(script)) { 385 throw new LocaleSyntaxException("Ill-formed script: " + script); 386 } 387 388 if (region.length() > 0 && !LanguageTag.isRegion(region)) { 389 throw new LocaleSyntaxException("Ill-formed region: " + region); 390 } 391 392 if (variant.length() > 0) { 393 int errIdx = checkVariants(variant, BaseLocale.SEP); 394 if (errIdx != -1) { 395 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); 396 } 397 } 398 399 // The input locale is validated at this point. 400 // Now, updating builder's internal fields. 401 _language = language; 402 _script = script; 403 _region = region; 404 _variant = variant; 405 clearExtensions(); 406 407 Set<Character> extKeys = (extensions == null) ? null : extensions.getKeys(); 408 if (extKeys != null) { 409 // map extensions back to builder's internal format 410 for (Character key : extKeys) { 411 Extension e = extensions.getExtension(key); 412 if (e instanceof UnicodeLocaleExtension) { 413 UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e; 414 for (String uatr : ue.getUnicodeLocaleAttributes()) { 415 if (_uattributes == null) { 416 _uattributes = new HashSet<CaseInsensitiveString>(4); 417 } 418 _uattributes.add(new CaseInsensitiveString(uatr)); 419 } 420 for (String ukey : ue.getUnicodeLocaleKeys()) { 421 if (_ukeywords == null) { 422 _ukeywords = new HashMap<CaseInsensitiveString, String>(4); 423 } 424 _ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey)); 425 } 426 } else { 427 if (_extensions == null) { 428 _extensions = new HashMap<CaseInsensitiveChar, String>(4); 429 } 430 _extensions.put(new CaseInsensitiveChar(key.charValue()), e.getValue()); 431 } 432 } 433 } 434 return this; 435 } 436 clear()437 public InternalLocaleBuilder clear() { 438 _language = ""; 439 _script = ""; 440 _region = ""; 441 _variant = ""; 442 clearExtensions(); 443 return this; 444 } 445 clearExtensions()446 public InternalLocaleBuilder clearExtensions() { 447 if (_extensions != null) { 448 _extensions.clear(); 449 } 450 if (_uattributes != null) { 451 _uattributes.clear(); 452 } 453 if (_ukeywords != null) { 454 _ukeywords.clear(); 455 } 456 return this; 457 } 458 getBaseLocale()459 public BaseLocale getBaseLocale() { 460 String language = _language; 461 String script = _script; 462 String region = _region; 463 String variant = _variant; 464 465 // Special private use subtag sequence identified by "lvariant" will be 466 // interpreted as Java variant. 467 if (_extensions != null) { 468 String privuse = _extensions.get(PRIVUSE_KEY); 469 if (privuse != null) { 470 StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP); 471 boolean sawPrefix = false; 472 int privVarStart = -1; 473 while (!itr.isDone()) { 474 if (sawPrefix) { 475 privVarStart = itr.currentStart(); 476 break; 477 } 478 if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { 479 sawPrefix = true; 480 } 481 itr.next(); 482 } 483 if (privVarStart != -1) { 484 StringBuilder sb = new StringBuilder(variant); 485 if (sb.length() != 0) { 486 sb.append(BaseLocale.SEP); 487 } 488 sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP, BaseLocale.SEP)); 489 variant = sb.toString(); 490 } 491 } 492 } 493 494 return BaseLocale.getInstance(language, script, region, variant); 495 } 496 getLocaleExtensions()497 public LocaleExtensions getLocaleExtensions() { 498 if ((_extensions == null || _extensions.size() == 0) 499 && (_uattributes == null || _uattributes.size() == 0) 500 && (_ukeywords == null || _ukeywords.size() == 0)) { 501 return LocaleExtensions.EMPTY_EXTENSIONS; 502 } 503 504 return new LocaleExtensions(_extensions, _uattributes, _ukeywords); 505 } 506 507 /* 508 * Remove special private use subtag sequence identified by "lvariant" 509 * and return the rest. Only used by LocaleExtensions 510 */ removePrivateuseVariant(String privuseVal)511 static String removePrivateuseVariant(String privuseVal) { 512 StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP); 513 514 // Note: privateuse value "abc-lvariant" is unchanged 515 // because no subtags after "lvariant". 516 517 int prefixStart = -1; 518 boolean sawPrivuseVar = false; 519 while (!itr.isDone()) { 520 if (prefixStart != -1) { 521 // Note: privateuse value "abc-lvariant" is unchanged 522 // because no subtags after "lvariant". 523 sawPrivuseVar = true; 524 break; 525 } 526 if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { 527 prefixStart = itr.currentStart(); 528 } 529 itr.next(); 530 } 531 if (!sawPrivuseVar) { 532 return privuseVal; 533 } 534 535 assert(prefixStart == 0 || prefixStart > 1); 536 return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1); 537 } 538 539 /* 540 * Check if the given variant subtags separated by the given 541 * separator(s) are valid 542 */ checkVariants(String variants, String sep)543 private int checkVariants(String variants, String sep) { 544 StringTokenIterator itr = new StringTokenIterator(variants, sep); 545 while (!itr.isDone()) { 546 String s = itr.current(); 547 if (!LanguageTag.isVariant(s)) { 548 return itr.currentStart(); 549 } 550 itr.next(); 551 } 552 return -1; 553 } 554 555 /* 556 * Private methods parsing Unicode Locale Extension subtags. 557 * Duplicated attributes/keywords will be ignored. 558 * The input must be a valid extension subtags (excluding singleton). 559 */ setUnicodeLocaleExtension(String subtags)560 private void setUnicodeLocaleExtension(String subtags) { 561 // wipe out existing attributes/keywords 562 if (_uattributes != null) { 563 _uattributes.clear(); 564 } 565 if (_ukeywords != null) { 566 _ukeywords.clear(); 567 } 568 569 StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); 570 571 // parse attributes 572 while (!itr.isDone()) { 573 if (!UnicodeLocaleExtension.isAttribute(itr.current())) { 574 break; 575 } 576 if (_uattributes == null) { 577 _uattributes = new HashSet<CaseInsensitiveString>(4); 578 } 579 _uattributes.add(new CaseInsensitiveString(itr.current())); 580 itr.next(); 581 } 582 583 // parse keywords 584 CaseInsensitiveString key = null; 585 String type; 586 int typeStart = -1; 587 int typeEnd = -1; 588 while (!itr.isDone()) { 589 if (key != null) { 590 if (UnicodeLocaleExtension.isKey(itr.current())) { 591 // next keyword - emit previous one 592 assert(typeStart == -1 || typeEnd != -1); 593 type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); 594 if (_ukeywords == null) { 595 _ukeywords = new HashMap<CaseInsensitiveString, String>(4); 596 } 597 _ukeywords.put(key, type); 598 599 // reset keyword info 600 CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current()); 601 key = _ukeywords.containsKey(tmpKey) ? null : tmpKey; 602 typeStart = typeEnd = -1; 603 } else { 604 if (typeStart == -1) { 605 typeStart = itr.currentStart(); 606 } 607 typeEnd = itr.currentEnd(); 608 } 609 } else if (UnicodeLocaleExtension.isKey(itr.current())) { 610 // 1. first keyword or 611 // 2. next keyword, but previous one was duplicate 612 key = new CaseInsensitiveString(itr.current()); 613 if (_ukeywords != null && _ukeywords.containsKey(key)) { 614 // duplicate 615 key = null; 616 } 617 } 618 619 if (!itr.hasNext()) { 620 if (key != null) { 621 // last keyword 622 assert(typeStart == -1 || typeEnd != -1); 623 type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); 624 if (_ukeywords == null) { 625 _ukeywords = new HashMap<CaseInsensitiveString, String>(4); 626 } 627 _ukeywords.put(key, type); 628 } 629 break; 630 } 631 632 itr.next(); 633 } 634 } 635 636 static class CaseInsensitiveString { 637 private String _s; 638 CaseInsensitiveString(String s)639 CaseInsensitiveString(String s) { 640 _s = s; 641 } 642 value()643 public String value() { 644 return _s; 645 } 646 647 @Override hashCode()648 public int hashCode() { 649 return AsciiUtil.toLowerString(_s).hashCode(); 650 } 651 652 @Override equals(Object obj)653 public boolean equals(Object obj) { 654 if (this == obj) { 655 return true; 656 } 657 if (!(obj instanceof CaseInsensitiveString)) { 658 return false; 659 } 660 return AsciiUtil.caseIgnoreMatch(_s, ((CaseInsensitiveString)obj).value()); 661 } 662 } 663 664 static class CaseInsensitiveChar { 665 private char _c; 666 CaseInsensitiveChar(char c)667 CaseInsensitiveChar(char c) { 668 _c = c; 669 } 670 value()671 public char value() { 672 return _c; 673 } 674 675 @Override hashCode()676 public int hashCode() { 677 return AsciiUtil.toLower(_c); 678 } 679 680 @Override equals(Object obj)681 public boolean equals(Object obj) { 682 if (this == obj) { 683 return true; 684 } 685 if (!(obj instanceof CaseInsensitiveChar)) { 686 return false; 687 } 688 return _c == AsciiUtil.toLower(((CaseInsensitiveChar)obj).value()); 689 } 690 691 } 692 } 693