1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Splitter; 4 import com.ibm.icu.impl.Relation; 5 import com.ibm.icu.impl.Row; 6 import com.ibm.icu.impl.UnicodeMap; 7 import com.ibm.icu.lang.UCharacter; 8 import com.ibm.icu.text.Collator; 9 import com.ibm.icu.text.Transform; 10 import com.ibm.icu.text.UnicodeSet; 11 import com.ibm.icu.util.ICUException; 12 import com.ibm.icu.util.Output; 13 import com.ibm.icu.util.ULocale; 14 import java.util.Arrays; 15 import java.util.Collections; 16 import java.util.EnumMap; 17 import java.util.HashMap; 18 import java.util.HashSet; 19 import java.util.Iterator; 20 import java.util.LinkedHashMap; 21 import java.util.List; 22 import java.util.Locale; 23 import java.util.Map; 24 import java.util.Map.Entry; 25 import java.util.Set; 26 import java.util.TreeMap; 27 import java.util.TreeSet; 28 import java.util.logging.Logger; 29 import java.util.regex.Matcher; 30 import java.util.regex.Pattern; 31 import org.unicode.cldr.draft.ScriptMetadata; 32 import org.unicode.cldr.draft.ScriptMetadata.Info; 33 import org.unicode.cldr.tool.LikelySubtags; 34 import org.unicode.cldr.util.RegexLookup.Finder; 35 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 36 import org.unicode.cldr.util.With.SimpleIterator; 37 import org.unicode.cldr.util.personname.PersonNameFormatter; 38 39 /** 40 * Provides a mechanism for dividing up LDML paths into understandable categories, eg for the Survey 41 * tool. 42 */ 43 public class PathHeader implements Comparable<PathHeader> { 44 /** Link to a section. Commenting out the page switch for now. */ 45 public static final String SECTION_LINK = "<a " + /* "target='CLDR_ST-SECTION' "+*/ "href='"; 46 47 static boolean UNIFORM_CONTINENTS = true; 48 static Factory factorySingleton = null; 49 50 static final boolean SKIP_ORIGINAL_PATH = true; 51 52 private static final Logger logger = Logger.getLogger(PathHeader.class.getName()); 53 54 static final Splitter HYPHEN_SPLITTER = Splitter.on('-'); 55 56 public enum Width { 57 FULL, 58 LONG, 59 WIDE, 60 SHORT, 61 NARROW; 62 getValue(String input)63 public static Width getValue(String input) { 64 try { 65 return Width.valueOf(input.toUpperCase(Locale.ENGLISH)); 66 } catch (RuntimeException e) { 67 e.printStackTrace(); 68 throw e; 69 } 70 } 71 72 @Override toString()73 public String toString() { 74 return name().toLowerCase(Locale.ENGLISH); 75 } 76 } 77 78 /** What status the survey tool should use. Can be overridden in Phase.getAction() */ 79 public enum SurveyToolStatus { 80 /** Never show. */ 81 DEPRECATED, 82 /** Hide. Can be overridden in Phase.getAction() */ 83 HIDE, 84 /** 85 * Don't allow Change box (except TC), instead show ticket. But allow votes. Can be 86 * overridden in Phase.getAction() 87 */ 88 READ_ONLY, 89 /** Allow change box and votes. Can be overridden in Phase.getAction() */ 90 READ_WRITE, 91 /** 92 * Changes are allowed as READ_WRITE, but field is always displayed as LTR, even in RTL 93 * locales (used for patterns). 94 */ 95 LTR_ALWAYS 96 } 97 98 private static final EnumNames<SectionId> SectionIdNames = new EnumNames<>(); 99 100 /** 101 * The Section for a path. Don't change these without committee buy-in. The 'name' may be 102 * 'Core_Data' and the toString is 'Core Data' toString gives the human name 103 */ 104 public enum SectionId { 105 Core_Data("Core Data"), 106 Locale_Display_Names("Locale Display Names"), 107 DateTime("Date & Time"), 108 Timezones, 109 Numbers, 110 Currencies, 111 Units, 112 Characters, 113 Misc("Miscellaneous"), 114 BCP47, 115 Supplemental, 116 Special; 117 SectionId(String... alternateNames)118 SectionId(String... alternateNames) { 119 SectionIdNames.add(this, alternateNames); 120 } 121 forString(String name)122 public static SectionId forString(String name) { 123 return SectionIdNames.forString(name); 124 } 125 126 @Override toString()127 public String toString() { 128 return SectionIdNames.toString(this); 129 } 130 } 131 132 private static final EnumNames<PageId> PageIdNames = new EnumNames<>(); 133 private static final Relation<SectionId, PageId> SectionIdToPageIds = 134 Relation.of(new TreeMap<>(), TreeSet.class); 135 136 private static class SubstringOrder implements Comparable<SubstringOrder> { 137 final String mainOrder; 138 final int order; 139 SubstringOrder(String source)140 public SubstringOrder(String source) { 141 int pos = source.lastIndexOf('-') + 1; 142 int ordering = COUNTS.indexOf(source.substring(pos)); 143 // account for digits, and "some" future proofing. 144 order = ordering < 0 ? source.charAt(pos) : 0x10000 + ordering; 145 mainOrder = source.substring(0, pos); 146 } 147 148 @Override 149 public String toString() { 150 return "{" + mainOrder + ", " + order + "}"; 151 } 152 153 @Override 154 public int compareTo(SubstringOrder other) { 155 int diff = alphabeticCompare(mainOrder, other.mainOrder); 156 if (diff != 0) { 157 return diff; 158 } 159 return order - other.order; 160 } 161 } 162 163 /** 164 * The Page for a path (within a Section). Don't change these without committee buy-in. the name 165 * is for example WAsia where toString gives Western Asia 166 */ 167 public enum PageId { 168 Alphabetic_Information(SectionId.Core_Data, "Alphabetic Information"), 169 Numbering_Systems(SectionId.Core_Data, "Numbering Systems"), 170 LinguisticElements(SectionId.Core_Data, "Linguistic Elements"), 171 172 Locale_Name_Patterns(SectionId.Locale_Display_Names, "Locale Name Patterns"), 173 Languages_A_D(SectionId.Locale_Display_Names, "Languages (A-D)"), 174 Languages_E_J(SectionId.Locale_Display_Names, "Languages (E-J)"), 175 Languages_K_N(SectionId.Locale_Display_Names, "Languages (K-N)"), 176 Languages_O_S(SectionId.Locale_Display_Names, "Languages (O-S)"), 177 Languages_T_Z(SectionId.Locale_Display_Names, "Languages (T-Z)"), 178 Scripts(SectionId.Locale_Display_Names), 179 Territories(SectionId.Locale_Display_Names, "Geographic Regions"), 180 T_NAmerica(SectionId.Locale_Display_Names, "Territories (North America)"), 181 T_SAmerica(SectionId.Locale_Display_Names, "Territories (South America)"), 182 T_Africa(SectionId.Locale_Display_Names, "Territories (Africa)"), 183 T_Europe(SectionId.Locale_Display_Names, "Territories (Europe)"), 184 T_Asia(SectionId.Locale_Display_Names, "Territories (Asia)"), 185 T_Oceania(SectionId.Locale_Display_Names, "Territories (Oceania)"), 186 Locale_Variants(SectionId.Locale_Display_Names, "Locale Variants"), 187 Keys(SectionId.Locale_Display_Names), 188 189 Fields(SectionId.DateTime), 190 Relative(SectionId.DateTime), 191 Gregorian(SectionId.DateTime), 192 ISO8601(SectionId.DateTime, "ISO 8601"), 193 Generic(SectionId.DateTime), 194 Buddhist(SectionId.DateTime), 195 Chinese(SectionId.DateTime), 196 Coptic(SectionId.DateTime), 197 Dangi(SectionId.DateTime), 198 Ethiopic(SectionId.DateTime), 199 Ethiopic_Amete_Alem(SectionId.DateTime, "Ethiopic-Amete-Alem"), 200 Hebrew(SectionId.DateTime), 201 Indian(SectionId.DateTime), 202 Islamic(SectionId.DateTime), 203 Japanese(SectionId.DateTime), 204 Persian(SectionId.DateTime), 205 Minguo(SectionId.DateTime), 206 207 Timezone_Display_Patterns(SectionId.Timezones, "Timezone Display Patterns"), 208 NAmerica(SectionId.Timezones, "North America"), 209 SAmerica(SectionId.Timezones, "South America"), 210 Africa(SectionId.Timezones), 211 Europe(SectionId.Timezones), 212 Russia(SectionId.Timezones), 213 WAsia(SectionId.Timezones, "Western Asia"), 214 CAsia(SectionId.Timezones, "Central Asia"), 215 EAsia(SectionId.Timezones, "Eastern Asia"), 216 SAsia(SectionId.Timezones, "Southern Asia"), 217 SEAsia(SectionId.Timezones, "Southeast Asia"), 218 Australasia(SectionId.Timezones), 219 Antarctica(SectionId.Timezones), 220 Oceania(SectionId.Timezones), 221 UnknownT(SectionId.Timezones, "Unknown Region"), 222 Overrides(SectionId.Timezones), 223 224 Symbols(SectionId.Numbers), 225 Number_Formatting_Patterns(SectionId.Numbers, "Number Formatting Patterns"), 226 Compact_Decimal_Formatting(SectionId.Numbers, "Compact Decimal Formatting"), 227 Compact_Decimal_Formatting_Other( 228 SectionId.Numbers, "Compact Decimal Formatting (Other Numbering Systems)"), 229 230 Measurement_Systems(SectionId.Units, "Measurement Systems"), 231 Duration(SectionId.Units), 232 Graphics(SectionId.Units), 233 Length_Metric(SectionId.Units, "Length Metric"), 234 Length_Other(SectionId.Units, "Length Other"), 235 Area(SectionId.Units), 236 Volume_Metric(SectionId.Units, "Volume Metric"), 237 Volume_US(SectionId.Units, "Volume US"), 238 Volume_Other(SectionId.Units, "Volume Other"), 239 SpeedAcceleration(SectionId.Units, "Speed and Acceleration"), 240 MassWeight(SectionId.Units, "Mass and Weight"), 241 EnergyPower(SectionId.Units, "Energy and Power"), 242 ElectricalFrequency(SectionId.Units, "Electrical and Frequency"), 243 Weather(SectionId.Units), 244 Digital(SectionId.Units), 245 Coordinates(SectionId.Units), 246 OtherUnitsMetric(SectionId.Units, "Other Units Metric"), 247 OtherUnitsMetricPer(SectionId.Units, "Other Units Metric Per"), 248 OtherUnitsUS(SectionId.Units, "Other Units US"), 249 OtherUnits(SectionId.Units, "Other Units"), 250 CompoundUnits(SectionId.Units, "Compound Units"), 251 252 Displaying_Lists(SectionId.Misc, "Displaying Lists"), 253 MinimalPairs(SectionId.Misc, "Minimal Pairs"), 254 PersonNameFormats(SectionId.Misc, "Person Name Formats"), 255 Transforms(SectionId.Misc), 256 257 Identity(SectionId.Special), 258 Version(SectionId.Special), 259 Suppress(SectionId.Special), 260 Deprecated(SectionId.Special), 261 Unknown(SectionId.Special), 262 263 C_NAmerica(SectionId.Currencies, "North America (C)"), 264 // need to add (C) to differentiate from Timezone territories 265 C_SAmerica(SectionId.Currencies, "South America (C)"), 266 C_NWEurope(SectionId.Currencies, "Northern/Western Europe"), 267 C_SEEurope(SectionId.Currencies, "Southern/Eastern Europe"), 268 C_NAfrica(SectionId.Currencies, "Northern Africa"), 269 C_WAfrica(SectionId.Currencies, "Western Africa"), 270 C_MAfrica(SectionId.Currencies, "Middle Africa"), 271 C_EAfrica(SectionId.Currencies, "Eastern Africa"), 272 C_SAfrica(SectionId.Currencies, "Southern Africa"), 273 C_WAsia(SectionId.Currencies, "Western Asia (C)"), 274 C_CAsia(SectionId.Currencies, "Central Asia (C)"), 275 C_EAsia(SectionId.Currencies, "Eastern Asia (C)"), 276 C_SAsia(SectionId.Currencies, "Southern Asia (C)"), 277 C_SEAsia(SectionId.Currencies, "Southeast Asia (C)"), 278 C_Oceania(SectionId.Currencies, "Oceania (C)"), 279 C_Unknown(SectionId.Currencies, "Unknown Region (C)"), 280 281 // BCP47 282 u_Extension(SectionId.BCP47), 283 t_Extension(SectionId.BCP47), 284 285 // Supplemental 286 Alias(SectionId.Supplemental), 287 IdValidity(SectionId.Supplemental), 288 Locale(SectionId.Supplemental), 289 RegionMapping(SectionId.Supplemental), 290 WZoneMapping(SectionId.Supplemental), 291 Transform(SectionId.Supplemental), 292 Units(SectionId.Supplemental), 293 Likely(SectionId.Supplemental), 294 LanguageMatch(SectionId.Supplemental), 295 TerritoryInfo(SectionId.Supplemental), 296 LanguageInfo(SectionId.Supplemental), 297 LanguageGroup(SectionId.Supplemental), 298 Fallback(SectionId.Supplemental), 299 Gender(SectionId.Supplemental), 300 Grammar(SectionId.Supplemental), 301 Metazone(SectionId.Supplemental), 302 NumberSystem(SectionId.Supplemental), 303 Plural(SectionId.Supplemental), 304 PluralRange(SectionId.Supplemental), 305 Containment(SectionId.Supplemental), 306 Currency(SectionId.Supplemental), 307 Calendar(SectionId.Supplemental), 308 WeekData(SectionId.Supplemental), 309 Measurement(SectionId.Supplemental), 310 Language(SectionId.Supplemental), 311 RBNF(SectionId.Supplemental), 312 Segmentation(SectionId.Supplemental), 313 DayPeriod(SectionId.Supplemental), 314 315 Category(SectionId.Characters), 316 317 // [Smileys, People, Animals & Nature, Food & Drink, Travel & Places, Activities, Objects, 318 // Symbols, Flags] 319 Smileys(SectionId.Characters, "Smileys & Emotion"), 320 People(SectionId.Characters, "People & Body"), 321 People2(SectionId.Characters, "People & Body 2"), 322 Animals_Nature(SectionId.Characters, "Animals & Nature"), 323 Food_Drink(SectionId.Characters, "Food & Drink"), 324 Travel_Places(SectionId.Characters, "Travel & Places"), 325 Travel_Places2(SectionId.Characters, "Travel & Places 2"), 326 Activities(SectionId.Characters), 327 Objects(SectionId.Characters), 328 Objects2(SectionId.Characters), 329 EmojiSymbols(SectionId.Characters, "Emoji Symbols"), 330 Punctuation(SectionId.Characters), 331 MathSymbols(SectionId.Characters, "Math Symbols"), 332 OtherSymbols(SectionId.Characters, "Other Symbols"), 333 Flags(SectionId.Characters), 334 Component(SectionId.Characters), 335 Typography(SectionId.Characters), 336 ; 337 338 private final SectionId sectionId; 339 340 PageId(SectionId sectionId, String... alternateNames) { 341 this.sectionId = sectionId; 342 SectionIdToPageIds.put(sectionId, this); 343 PageIdNames.add(this, alternateNames); 344 } 345 346 /** 347 * Construct a pageId given a string 348 * 349 * @param name 350 * @return 351 */ 352 public static PageId forString(String name) { 353 try { 354 return PageIdNames.forString(name); 355 } catch (Exception e) { 356 throw new ICUException("No PageId for " + name, e); 357 } 358 } 359 360 /** 361 * Returns the page id 362 * 363 * @return a page ID, such as 'Languages' 364 */ 365 @Override 366 public String toString() { 367 return PageIdNames.toString(this); 368 } 369 370 /** 371 * Get the containing section id, such as 'Code Lists' 372 * 373 * @return the containing section ID 374 */ 375 public SectionId getSectionId() { 376 return sectionId; 377 } 378 } 379 380 private final SectionId sectionId; 381 private final PageId pageId; 382 private final String header; 383 private final String code; 384 private final String originalPath; 385 private final SurveyToolStatus status; 386 387 // Used for ordering 388 private final int headerOrder; 389 private final long codeOrder; 390 private final SubstringOrder codeSuborder; 391 392 static final Pattern SEMI = PatternCache.get("\\s*;\\s*"); 393 static final Matcher ALT_MATCHER = PatternCache.get("\\[@alt=\"([^\"]*+)\"]").matcher(""); 394 395 static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance(); 396 static final Map<String, String> metazoneToContinent = 397 supplementalDataInfo.getMetazoneToContinentMap(); 398 static final Map<String, String> metazoneToPageTerritory = new HashMap<>(); 399 400 static { 401 Map<String, Map<String, String>> metazoneToRegionToZone = 402 supplementalDataInfo.getMetazoneToRegionToZone(); 403 for (Entry<String, Map<String, String>> metazoneEntry : metazoneToRegionToZone.entrySet()) { 404 String metazone = metazoneEntry.getKey(); 405 String worldZone = metazoneEntry.getValue().get("001"); 406 String territory = Containment.getRegionFromZone(worldZone); 407 if (territory == null) { 408 territory = "ZZ"; 409 } 410 // Russia, Antarctica => territory 411 // in Australasia, Asia, S. America => subcontinent 412 // in N. America => N. America (grouping of 3 subcontinents) 413 // in everything else => continent 414 if (territory.equals("RU") || territory.equals("AQ")) { 415 metazoneToPageTerritory.put(metazone, territory); 416 } else { 417 String continent = Containment.getContinent(territory); 418 String subcontinent = Containment.getSubcontinent(territory); 419 if (continent.equals("142")) { // Asia 420 metazoneToPageTerritory.put(metazone, subcontinent); 421 } else if (continent.equals("019")) { // Americas 422 metazoneToPageTerritory.put( 423 metazone, subcontinent.equals("005") ? subcontinent : "003"); 424 } else if (subcontinent.equals("053")) { // Australasia 425 metazoneToPageTerritory.put(metazone, subcontinent); 426 } else { 427 metazoneToPageTerritory.put(metazone, continent); 428 } 429 } 430 } 431 } 432 433 private PathHeader( 434 SectionId sectionId, 435 PageId pageId, 436 String header, 437 int headerOrder, 438 String code, 439 long codeOrder, 440 SubstringOrder suborder, 441 SurveyToolStatus status, 442 String originalPath) { 443 this.sectionId = sectionId; 444 this.pageId = pageId; 445 this.header = header; 446 this.headerOrder = headerOrder; 447 this.code = code; 448 this.codeOrder = codeOrder; 449 this.codeSuborder = suborder; 450 this.originalPath = originalPath; 451 this.status = status; 452 } 453 454 /** 455 * Return a factory for use in creating the headers. This is cached after first use. The calls 456 * are thread-safe. Null gets the default (CLDRConfig) english file. 457 * 458 * @param englishFile 459 */ 460 public static Factory getFactory(CLDRFile englishFile) { 461 if (factorySingleton == null) { 462 if (englishFile == null) { 463 englishFile = CLDRConfig.getInstance().getEnglish(); 464 } 465 if (!englishFile.getLocaleID().equals(ULocale.ENGLISH.getBaseName())) { 466 throw new IllegalArgumentException( 467 "PathHeader's CLDRFile must be '" 468 + ULocale.ENGLISH.getBaseName() 469 + "', but found '" 470 + englishFile.getLocaleID() 471 + "'"); 472 } 473 factorySingleton = new Factory(englishFile); 474 } 475 return factorySingleton; 476 } 477 478 /** Convenience method for common case. See {{@link #getFactory(CLDRFile)}} */ 479 public static Factory getFactory() { 480 return getFactory(null); 481 } 482 483 /** 484 * @deprecated 485 */ 486 @Deprecated 487 public String getSection() { 488 return sectionId.toString(); 489 } 490 491 public SectionId getSectionId() { 492 return sectionId; 493 } 494 495 /** 496 * @deprecated 497 */ 498 @Deprecated 499 public String getPage() { 500 return pageId.toString(); 501 } 502 503 public PageId getPageId() { 504 return pageId; 505 } 506 507 public String getHeader() { 508 return header == null ? "" : header; 509 } 510 511 public String getCode() { 512 return code; 513 } 514 515 public String getHeaderCode() { 516 return getHeader() + ": " + getCode(); 517 } 518 519 public String getOriginalPath() { 520 return originalPath; 521 } 522 523 public SurveyToolStatus getSurveyToolStatus() { 524 return status; 525 } 526 527 @Override 528 public String toString() { 529 return sectionId 530 + "\t" 531 + pageId 532 + "\t" 533 + header // + "\t" + headerOrder 534 + "\t" 535 + code // + "\t" + codeOrder 536 ; 537 } 538 539 /** 540 * Compare this PathHeader to another one 541 * 542 * @param other the object to be compared. 543 * @return 0 if equal, -1 if less, 1 if more 544 * <p>Note: if we ever have to compare just the header or just the code, methods to do that 545 * were in release 44 (compareHeader and compareCode), but they were unused and therefore 546 * removed in CLDR-11155. 547 */ 548 @Override 549 public int compareTo(PathHeader other) { 550 // Within each section, order alphabetically if the integer orders are 551 // not different. 552 try { 553 int result; 554 if (0 != (result = sectionId.compareTo(other.sectionId))) { 555 return result; 556 } 557 if (0 != (result = pageId.compareTo(other.pageId))) { 558 return result; 559 } 560 if (0 != (result = headerOrder - other.headerOrder)) { 561 return result; 562 } 563 if (0 != (result = alphabeticCompare(header, other.header))) { 564 return result; 565 } 566 long longResult; 567 if (0 != (longResult = codeOrder - other.codeOrder)) { 568 return longResult < 0 ? -1 : 1; 569 } 570 if (codeSuborder != null) { // do all three cases, for transitivity 571 if (other.codeSuborder != null) { 572 if (0 != (result = codeSuborder.compareTo(other.codeSuborder))) { 573 return result; 574 } 575 } else { 576 return 1; // if codeSuborder != null (and other.codeSuborder 577 // == null), it is greater 578 } 579 } else if (other.codeSuborder != null) { 580 return -1; // if codeSuborder == null (and other.codeSuborder != 581 // null), it is greater 582 } 583 if (0 != (result = alphabeticCompare(code, other.code))) { 584 return result; 585 } 586 if (!SKIP_ORIGINAL_PATH) { 587 if (0 != (result = alphabeticCompare(originalPath, other.originalPath))) { 588 return result; 589 } 590 } 591 return 0; 592 } catch (RuntimeException e) { 593 throw new IllegalArgumentException( 594 "Internal problem comparing " + this + " and " + other, e); 595 } 596 } 597 598 @Override 599 public boolean equals(Object obj) { 600 PathHeader other; 601 try { 602 other = (PathHeader) obj; 603 } catch (Exception e) { 604 return false; 605 } 606 return sectionId == other.sectionId 607 && pageId == other.pageId 608 && header.equals(other.header) 609 && code.equals(other.code); 610 } 611 612 @Override 613 public int hashCode() { 614 return sectionId.hashCode() ^ pageId.hashCode() ^ header.hashCode() ^ code.hashCode(); 615 } 616 617 public static class Factory implements Transform<String, PathHeader> { 618 static final RegexLookup<RawData> lookup = 619 RegexLookup.of(new PathHeaderTransform()) 620 .setPatternTransform(RegexLookup.RegexFinderTransformPath) 621 .loadFromFile(PathHeader.class, "data/PathHeader.txt"); 622 // synchronized with lookup 623 static final Output<String[]> args = new Output<>(); 624 // synchronized with lookup 625 static final Counter<RawData> counter = new Counter<>(); 626 // synchronized with lookup 627 static final Map<RawData, String> samples = new HashMap<>(); 628 // synchronized with lookup 629 static long order; 630 static SubstringOrder suborder; 631 632 static final Map<String, PathHeader> cache = new HashMap<>(); 633 // synchronized with cache 634 static final Map<SectionId, Map<PageId, SectionPage>> sectionToPageToSectionPage = 635 new EnumMap<>(SectionId.class); 636 static final Relation<SectionPage, String> sectionPageToPaths = 637 Relation.of(new TreeMap<>(), HashSet.class); 638 private static CLDRFile englishFile; 639 private final Set<String> matchersFound = new HashSet<>(); 640 641 /** 642 * Create a factory for creating PathHeaders. 643 * 644 * @param englishFile - only sets the file (statically!) if not already set. 645 */ 646 private Factory(CLDRFile englishFile) { 647 setEnglishCLDRFileIfNotSet(englishFile); // temporary 648 } 649 650 /** 651 * Set englishFile if it is not already set. 652 * 653 * @param englishFile2 the value to set for englishFile 654 */ 655 private static void setEnglishCLDRFileIfNotSet(CLDRFile englishFile2) { 656 synchronized (Factory.class) { 657 if (englishFile == null) { 658 englishFile = englishFile2; 659 } 660 } 661 } 662 663 /** Use only when trying to find unmatched patterns */ 664 public void clearCache() { 665 synchronized (cache) { 666 cache.clear(); 667 } 668 } 669 670 /** Return the PathHeader for a given path. Thread-safe. */ 671 public PathHeader fromPath(String path) { 672 return fromPath(path, null); 673 } 674 675 /** Return the PathHeader for a given path. Thread-safe. */ 676 @Override 677 public PathHeader transform(String path) { 678 return fromPath(path, null); 679 } 680 681 /** 682 * Return the PathHeader for a given path. Thread-safe. 683 * 684 * @param failures a list of failures to add to. 685 */ 686 public PathHeader fromPath(final String path, List<String> failures) { 687 if (path == null) { 688 throw new NullPointerException("Path cannot be null"); 689 } 690 synchronized (cache) { 691 PathHeader old = cache.get(path); 692 if (old != null) { 693 return old; 694 } 695 } 696 synchronized (lookup) { 697 String cleanPath = path; 698 // special handling for alt 699 String alt = null; 700 int altPos = cleanPath.indexOf("[@alt="); 701 if (altPos >= 0 && !cleanPath.endsWith("/symbol[@alt=\"narrow\"]")) { 702 if (ALT_MATCHER.reset(cleanPath).find()) { 703 alt = ALT_MATCHER.group(1); 704 cleanPath = 705 cleanPath.substring(0, ALT_MATCHER.start()) 706 + cleanPath.substring(ALT_MATCHER.end()); 707 int pos = alt.indexOf("proposed"); 708 if (pos >= 0 && !path.startsWith("//ldml/collations")) { 709 alt = pos == 0 ? null : alt.substring(0, pos - 1); 710 // drop "proposed", 711 // change "xxx-proposed" to xxx. 712 } 713 } else { 714 throw new IllegalArgumentException(); 715 } 716 } 717 Output<Finder> matcherFound = new Output<>(); 718 RawData data = lookup.get(cleanPath, null, args, matcherFound, failures); 719 if (data == null) { 720 return null; 721 } 722 matchersFound.add(matcherFound.value.toString()); 723 counter.add(data, 1); 724 if (!samples.containsKey(data)) { 725 samples.put(data, cleanPath); 726 } 727 try { 728 PathHeader result = makePathHeader(data, path, alt); 729 synchronized (cache) { 730 PathHeader old = cache.get(path); 731 if (old == null) { 732 cache.put(path, result); 733 } else { 734 result = old; 735 } 736 Map<PageId, SectionPage> pageToPathHeaders = 737 sectionToPageToSectionPage.get(result.sectionId); 738 if (pageToPathHeaders == null) { 739 sectionToPageToSectionPage.put( 740 result.sectionId, 741 pageToPathHeaders = new EnumMap<>(PageId.class)); 742 } 743 SectionPage sectionPage = pageToPathHeaders.get(result.pageId); 744 if (sectionPage == null) { 745 sectionPage = new SectionPage(result.sectionId, result.pageId); 746 pageToPathHeaders.put(result.pageId, sectionPage); 747 } 748 sectionPageToPaths.put(sectionPage, path); 749 } 750 return result; 751 } catch (Exception e) { 752 throw new IllegalArgumentException( 753 "Probably mismatch in Page/Section enum, or too few capturing groups in regex for " 754 + path, 755 e); 756 } 757 } 758 } 759 makePathHeader(RawData data, String path, String alt)760 private PathHeader makePathHeader(RawData data, String path, String alt) { 761 // Caution: each call to PathHeader.Factory.fix changes the value of 762 // PathHeader.Factory.order 763 SectionId newSectionId = SectionId.forString(fix(data.section, 0)); 764 PageId newPageId = PageId.forString(fix(data.page, 0)); 765 String newHeader = fix(data.header, data.headerOrder); 766 int newHeaderOrder = (int) order; 767 String codeDashAlt = data.code + (alt == null ? "" : ("-" + alt)); 768 String newCode = fix(codeDashAlt, data.codeOrder); 769 long newCodeOrder = order; 770 return new PathHeader( 771 newSectionId, 772 newPageId, 773 newHeader, 774 newHeaderOrder, 775 newCode, 776 newCodeOrder, 777 suborder, 778 data.status, 779 path); 780 } 781 782 private static class SectionPage implements Comparable<SectionPage> { 783 private final SectionId sectionId; 784 private final PageId pageId; 785 SectionPage(SectionId sectionId, PageId pageId)786 public SectionPage(SectionId sectionId, PageId pageId) { 787 this.sectionId = sectionId; 788 this.pageId = pageId; 789 } 790 791 @Override compareTo(SectionPage other)792 public int compareTo(SectionPage other) { 793 // Within each section, order alphabetically if the integer 794 // orders are 795 // not different. 796 int result; 797 if (0 != (result = sectionId.compareTo(other.sectionId))) { 798 return result; 799 } 800 if (0 != (result = pageId.compareTo(other.pageId))) { 801 return result; 802 } 803 return 0; 804 } 805 806 @Override equals(Object obj)807 public boolean equals(Object obj) { 808 PathHeader other; 809 try { 810 other = (PathHeader) obj; 811 } catch (Exception e) { 812 return false; 813 } 814 return sectionId == other.sectionId && pageId == other.pageId; 815 } 816 817 @Override hashCode()818 public int hashCode() { 819 return sectionId.hashCode() ^ pageId.hashCode(); 820 } 821 822 @Override toString()823 public String toString() { 824 return sectionId + " > " + pageId; 825 } 826 } 827 828 /** 829 * Returns a set of paths currently associated with the given section and page. 830 * 831 * <p><b>Warning:</b> 832 * 833 * <ol> 834 * <li>The set may not be complete for a cldrFile unless all of paths in the file have had 835 * fromPath called. And this includes getExtraPaths(). 836 * <li>The set may include paths that have no value in the current cldrFile. 837 * <li>The set may be empty, if the section/page aren't valid. 838 * </ol> 839 * 840 * Thread-safe. 841 */ getCachedPaths(SectionId sectionId, PageId page)842 public static Set<String> getCachedPaths(SectionId sectionId, PageId page) { 843 Set<String> target = new HashSet<>(); 844 synchronized (cache) { 845 Map<PageId, SectionPage> pageToSectionPage = 846 sectionToPageToSectionPage.get(sectionId); 847 if (pageToSectionPage == null) { 848 return target; 849 } 850 SectionPage sectionPage = pageToSectionPage.get(page); 851 if (sectionPage == null) { 852 return target; 853 } 854 Set<String> set = sectionPageToPaths.getAll(sectionPage); 855 target.addAll(set); 856 } 857 return target; 858 } 859 860 /** 861 * Return the Sections and Pages that are in defined, for display in menus. Both are 862 * ordered. 863 */ getSectionIdsToPageIds()864 public static Relation<SectionId, PageId> getSectionIdsToPageIds() { 865 SectionIdToPageIds.freeze(); // just in case 866 return SectionIdToPageIds; 867 } 868 filterCldr(SectionId section, PageId page, CLDRFile file)869 public Iterable<String> filterCldr(SectionId section, PageId page, CLDRFile file) { 870 return new FilteredIterable(section, page, file); 871 } 872 873 private class FilteredIterable implements Iterable<String>, SimpleIterator<String> { 874 private final SectionId sectionId; 875 private final PageId pageId; 876 private final Iterator<String> fileIterator; 877 FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file)878 FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file) { 879 this.sectionId = sectionId; 880 this.pageId = pageId; 881 this.fileIterator = file.fullIterable().iterator(); 882 } 883 FilteredIterable(String section, String page, CLDRFile file)884 public FilteredIterable(String section, String page, CLDRFile file) { 885 this(SectionId.forString(section), PageId.forString(page), file); 886 } 887 888 @Override iterator()889 public Iterator<String> iterator() { 890 return With.toIterator(this); 891 } 892 893 @Override next()894 public String next() { 895 while (fileIterator.hasNext()) { 896 String path = fileIterator.next(); 897 PathHeader pathHeader = fromPath(path); 898 if (sectionId == pathHeader.sectionId && pageId == pathHeader.pageId) { 899 return path; 900 } 901 } 902 return null; 903 } 904 } 905 906 private static class ChronologicalOrder { 907 private final Map<String, Integer> map = new HashMap<>(); 908 private String item; 909 private int order; 910 private final ChronologicalOrder toClear; 911 ChronologicalOrder(ChronologicalOrder toClear)912 ChronologicalOrder(ChronologicalOrder toClear) { 913 this.toClear = toClear; 914 } 915 getOrder()916 int getOrder() { 917 return order; 918 } 919 set(String itemToOrder)920 public String set(String itemToOrder) { 921 if (itemToOrder.startsWith("*")) { 922 item = itemToOrder.substring(1, itemToOrder.length()); 923 return item; // keep old order 924 } 925 item = itemToOrder; 926 Integer old = map.get(item); 927 if (old != null) { 928 order = old.intValue(); 929 } else { 930 order = map.size(); 931 map.put(item, order); 932 clearLower(); 933 } 934 return item; 935 } 936 clearLower()937 private void clearLower() { 938 if (toClear != null) { 939 toClear.map.clear(); 940 toClear.order = 0; 941 toClear.clearLower(); 942 } 943 } 944 } 945 946 static class RawData { 947 static ChronologicalOrder codeOrdering = new ChronologicalOrder(null); 948 static ChronologicalOrder headerOrdering = new ChronologicalOrder(codeOrdering); 949 RawData(String source)950 public RawData(String source) { 951 String[] split = SEMI.split(source); 952 section = split[0]; 953 // HACK 954 if (section.equals("Timezones") && split[1].equals("Indian")) { 955 page = "Indian2"; 956 } else { 957 page = split[1]; 958 } 959 960 header = headerOrdering.set(split[2]); 961 headerOrder = headerOrdering.getOrder(); 962 963 code = codeOrdering.set(split[3]); 964 codeOrder = codeOrdering.getOrder(); 965 966 status = 967 split.length < 5 968 ? SurveyToolStatus.READ_WRITE 969 : SurveyToolStatus.valueOf(split[4]); 970 } 971 972 public final String section; 973 public final String page; 974 public final String header; 975 public final int headerOrder; 976 public final String code; 977 public final int codeOrder; 978 public final SurveyToolStatus status; 979 980 @Override 981 public String toString() { 982 return section 983 + "\t" 984 + page 985 + "\t" 986 + header 987 + "\t" 988 + headerOrder 989 + "\t" 990 + code 991 + "\t" 992 + codeOrder 993 + "\t" 994 + status; 995 } 996 } 997 998 static class PathHeaderTransform implements Transform<String, RawData> { 999 @Override 1000 public RawData transform(String source) { 1001 return new RawData(source); 1002 } 1003 } 1004 1005 /** 1006 * Internal data, for testing and debugging. 1007 * 1008 * @deprecated 1009 */ 1010 @Deprecated 1011 public class CounterData extends Row.R4<String, RawData, String, String> { 1012 public CounterData(String a, RawData b, String c) { 1013 super( 1014 a, 1015 b, 1016 c == null ? "no sample" : c, 1017 c == null ? "no sample" : fromPath(c).toString()); 1018 } 1019 } 1020 1021 /** 1022 * Get the internal data, for testing and debugging. 1023 * 1024 * @deprecated 1025 */ 1026 @Deprecated 1027 public Counter<CounterData> getInternalCounter() { 1028 synchronized (lookup) { 1029 Counter<CounterData> result = new Counter<>(); 1030 for (Map.Entry<Finder, RawData> foo : lookup) { 1031 Finder finder = foo.getKey(); 1032 RawData data = foo.getValue(); 1033 long count = counter.get(data); 1034 result.add(new CounterData(finder.toString(), data, samples.get(data)), count); 1035 } 1036 return result; 1037 } 1038 } 1039 1040 static Map<String, Transform<String, String>> functionMap = new HashMap<>(); 1041 static String[] months = { 1042 "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", 1043 "Und" 1044 }; 1045 static List<String> days = Arrays.asList("sun", "mon", "tue", "wed", "thu", "fri", "sat"); 1046 static List<String> unitOrder = DtdData.getUnitOrder().getOrder(); 1047 static final MapComparator<String> dayPeriods = 1048 new MapComparator<String>() 1049 .add( 1050 "am", 1051 "pm", 1052 "midnight", 1053 "noon", 1054 "morning1", 1055 "morning2", 1056 "afternoon1", 1057 "afternoon2", 1058 "evening1", 1059 "evening2", 1060 "night1", 1061 "night2") 1062 .freeze(); 1063 static LikelySubtags likelySubtags = new LikelySubtags(); 1064 static HyphenSplitter hyphenSplitter = new HyphenSplitter(); 1065 static Transform<String, String> catFromTerritory; 1066 static Transform<String, String> catFromTimezone; 1067 1068 static { 1069 // Put any new functions used in PathHeader.txt in here. 1070 // To change the order of items within a section or heading, set 1071 // order/suborder to be the relative position of the current item. 1072 functionMap.put( 1073 "month", 1074 new Transform<>() { 1075 @Override 1076 public String transform(String source) { 1077 int m = Integer.parseInt(source); 1078 order = m; 1079 return months[m - 1]; 1080 } 1081 }); 1082 functionMap.put( 1083 "count", 1084 new Transform<>() { 1085 @Override 1086 public String transform(String source) { 1087 suborder = new SubstringOrder(source); 1088 return source; 1089 } 1090 }); 1091 functionMap.put( 1092 "count2", 1093 new Transform<>() { 1094 @Override 1095 public String transform(String source) { 1096 int pos = source.indexOf('-'); 1097 source = pos + source.substring(pos); 1098 suborder = new SubstringOrder(source); // make 10000-... 1099 // into 5- 1100 return source; 1101 } 1102 }); 1103 functionMap.put( 1104 "currencySymbol", 1105 new Transform<>() { 1106 @Override 1107 public String transform(String source) { 1108 order = 901; 1109 if (source.endsWith("narrow")) { 1110 order = 902; 1111 } 1112 if (source.endsWith("variant")) { 1113 order = 903; 1114 } 1115 return source; 1116 } 1117 }); 1118 // &unitCount($1-$3-$4), where $1 is length, $2 is count, $3 is case (optional) 1119 // but also 1120 // &unitCount($1-$3-$5-$4), where $5 is case, $4 is gender — notice order change 1121 functionMap.put( 1122 "unitCount", 1123 new Transform<>() { 1124 @Override 1125 public String transform(String source) { 1126 List<String> parts = HYPHEN_SPLITTER.splitToList(source); 1127 if (parts.size() == 1) { 1128 return source; 1129 } 1130 int lengthNumber = Width.getValue(parts.get(0)).ordinal(); 1131 int type = 0; 1132 int rest = 0; 1133 switch (parts.get(1)) { 1134 case "gender": 1135 type = 0; 1136 break; 1137 case "displayName": 1138 type = 1; 1139 break; 1140 case "per": 1141 type = 2; 1142 break; 1143 default: 1144 type = 3; 1145 int countNumber = 1146 (parts.size() > 1 1147 ? Count.valueOf(parts.get(1)) 1148 : Count.other) 1149 .ordinal(); 1150 int caseNumber = 1151 (parts.size() > 2 1152 ? GrammarInfo.CaseValues.valueOf( 1153 parts.get(2)) 1154 : GrammarInfo.CaseValues.nominative) 1155 .ordinal(); 1156 int genderNumber = GrammarInfo.GenderValues.neuter.ordinal(); 1157 if (parts.size() > 3) { 1158 String genderPart = parts.get(3); 1159 if (!genderPart.equals("dgender")) { 1160 genderNumber = 1161 GrammarInfo.GenderValues.valueOf(genderPart) 1162 .ordinal(); 1163 } 1164 type = 4; 1165 } 1166 rest = (countNumber << 16) | (caseNumber << 8) | genderNumber; 1167 break; 1168 } 1169 order = (type << 28) | (lengthNumber << 24) | rest; 1170 return source; 1171 } 1172 }); 1173 1174 functionMap.put( 1175 "pluralNumber", 1176 new Transform<>() { 1177 @Override 1178 public String transform(String source) { 1179 order = GrammarInfo.PluralValues.valueOf(source).ordinal(); 1180 return source; 1181 } 1182 }); 1183 1184 functionMap.put( 1185 "caseNumber", 1186 new Transform<>() { 1187 @Override 1188 public String transform(String source) { 1189 order = GrammarInfo.CaseValues.valueOf(source).ordinal(); 1190 return source; 1191 } 1192 }); 1193 1194 functionMap.put( 1195 "genderNumber", 1196 new Transform<>() { 1197 @Override 1198 public String transform(String source) { 1199 order = GrammarInfo.GenderValues.valueOf(source).ordinal(); 1200 return source; 1201 } 1202 }); 1203 1204 functionMap.put( 1205 "day", 1206 new Transform<>() { 1207 @Override 1208 public String transform(String source) { 1209 int m = days.indexOf(source); 1210 order = m; 1211 return source; 1212 } 1213 }); 1214 functionMap.put( 1215 "dayPeriod", 1216 new Transform<>() { 1217 @Override 1218 public String transform(String source) { 1219 try { 1220 order = dayPeriods.getNumericOrder(source); 1221 } catch (Exception e) { 1222 // if an old item is tried, like "evening", this will fail. 1223 // so that old data still works, hack this. 1224 order = Math.abs(source.hashCode() << 16); 1225 } 1226 return source; 1227 } 1228 }); 1229 functionMap.put( 1230 "calendar", 1231 new Transform<>() { 1232 final Map<String, String> fixNames = 1233 Builder.with(new HashMap<String, String>()) 1234 .put("islamicc", "Islamic Civil") 1235 .put("roc", "Minguo") 1236 .put("Ethioaa", "Ethiopic Amete Alem") 1237 .put("Gregory", "Gregorian") 1238 .put("iso8601", "ISO 8601") 1239 .freeze(); 1240 1241 @Override 1242 public String transform(String source) { 1243 String result = fixNames.get(source); 1244 return result != null ? result : UCharacter.toTitleCase(source, null); 1245 } 1246 }); 1247 1248 functionMap.put( 1249 "calField", 1250 new Transform<>() { 1251 @Override 1252 public String transform(String source) { 1253 String[] fields = source.split(":", 3); 1254 order = 0; 1255 final List<String> widthValues = 1256 Arrays.asList("wide", "abbreviated", "short", "narrow"); 1257 final List<String> calendarFieldValues = 1258 Arrays.asList( 1259 "Eras", 1260 "Quarters", 1261 "Months", 1262 "Days", 1263 "DayPeriods", 1264 "Formats"); 1265 final List<String> calendarFormatTypes = 1266 Arrays.asList("Standard", "Flexible", "Intervals"); 1267 final List<String> calendarContextTypes = 1268 Arrays.asList("none", "format", "stand-alone"); 1269 final List<String> calendarFormatSubtypes = 1270 Arrays.asList( 1271 "date", 1272 "time", 1273 "time12", 1274 "time24", 1275 "dateTime", 1276 "fallback"); 1277 1278 Map<String, String> fixNames = 1279 Builder.with(new HashMap<String, String>()) 1280 .put("DayPeriods", "Day Periods") 1281 .put("format", "Formatting") 1282 .put("stand-alone", "Standalone") 1283 .put("none", "") 1284 .put("date", "Date Formats") 1285 .put("time", "Time Formats") 1286 .put("time12", "12 Hour Time Formats") 1287 .put("time24", "24 Hour Time Formats") 1288 .put("dateTime", "Date & Time Combination Formats") 1289 .freeze(); 1290 1291 if (calendarFieldValues.contains(fields[0])) { 1292 order = calendarFieldValues.indexOf(fields[0]) * 100; 1293 } else { 1294 order = calendarFieldValues.size() * 100; 1295 } 1296 1297 if (fields[0].equals("Formats")) { 1298 if (calendarFormatTypes.contains(fields[1])) { 1299 order += calendarFormatTypes.indexOf(fields[1]) * 10; 1300 } else { 1301 order += calendarFormatTypes.size() * 10; 1302 } 1303 if (calendarFormatSubtypes.contains(fields[2])) { 1304 order += calendarFormatSubtypes.indexOf(fields[2]); 1305 } else { 1306 order += calendarFormatSubtypes.size(); 1307 } 1308 } else { 1309 if (widthValues.contains(fields[1])) { 1310 order += widthValues.indexOf(fields[1]) * 10; 1311 } else { 1312 order += widthValues.size() * 10; 1313 } 1314 if (calendarContextTypes.contains(fields[2])) { 1315 order += calendarContextTypes.indexOf(fields[2]); 1316 } else { 1317 order += calendarContextTypes.size(); 1318 } 1319 } 1320 1321 String[] fixedFields = new String[fields.length]; 1322 for (int i = 0; i < fields.length; i++) { 1323 String s = fixNames.get(fields[i]); 1324 fixedFields[i] = s != null ? s : fields[i]; 1325 } 1326 1327 return fixedFields[0] 1328 + " - " 1329 + fixedFields[1] 1330 + (fixedFields[2].length() > 0 ? " - " + fixedFields[2] : ""); 1331 } 1332 }); 1333 1334 functionMap.put( 1335 "titlecase", 1336 new Transform<>() { 1337 @Override 1338 public String transform(String source) { 1339 return UCharacter.toTitleCase(source, null); 1340 } 1341 }); 1342 functionMap.put( 1343 "categoryFromScript", 1344 new Transform<>() { 1345 @Override 1346 public String transform(String source) { 1347 String script = hyphenSplitter.split(source); 1348 Info info = ScriptMetadata.getInfo(script); 1349 if (info == null) { 1350 info = ScriptMetadata.getInfo("Zzzz"); 1351 } 1352 order = 100 - info.idUsage.ordinal(); 1353 return info.idUsage.name; 1354 } 1355 }); 1356 functionMap.put( 1357 "categoryFromKey", 1358 new Transform<>() { 1359 final Map<String, String> fixNames = 1360 Builder.with(new HashMap<String, String>()) 1361 .put("cf", "Currency Format") 1362 .put("em", "Emoji Presentation") 1363 .put("fw", "First Day of Week") 1364 .put("lb", "Line Break") 1365 .put("hc", "Hour Cycle") 1366 .put("ms", "Measurement System") 1367 .put("ss", "Sentence Break Suppressions") 1368 .freeze(); 1369 1370 @Override 1371 public String transform(String source) { 1372 String fixedName = fixNames.get(source); 1373 return fixedName != null ? fixedName : source; 1374 } 1375 }); 1376 functionMap.put( 1377 "languageSection", 1378 new Transform<>() { 1379 final char[] languageRangeStartPoints = {'A', 'E', 'K', 'O', 'T'}; 1380 final char[] languageRangeEndPoints = {'D', 'J', 'N', 'S', 'Z'}; 1381 1382 @Override 1383 public String transform(String source0) { 1384 char firstLetter = getEnglishFirstLetter(source0).charAt(0); 1385 for (int i = 0; i < languageRangeStartPoints.length; i++) { 1386 if (firstLetter >= languageRangeStartPoints[i] 1387 && firstLetter <= languageRangeEndPoints[i]) { 1388 return "Languages (" 1389 + Character.toUpperCase(languageRangeStartPoints[i]) 1390 + "-" 1391 + Character.toUpperCase(languageRangeEndPoints[i]) 1392 + ")"; 1393 } 1394 } 1395 return "Languages"; 1396 } 1397 }); 1398 functionMap.put( 1399 "firstLetter", 1400 new Transform<>() { 1401 @Override 1402 public String transform(String source0) { 1403 return getEnglishFirstLetter(source0); 1404 } 1405 }); 1406 functionMap.put( 1407 "languageSort", 1408 new Transform<>() { 1409 @Override 1410 public String transform(String source0) { 1411 String languageOnlyPart; 1412 int underscorePos = source0.indexOf("_"); 1413 if (underscorePos > 0) { 1414 languageOnlyPart = source0.substring(0, underscorePos); 1415 } else { 1416 languageOnlyPart = source0; 1417 } 1418 1419 return englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart) 1420 + " \u25BA " 1421 + source0; 1422 } 1423 }); 1424 functionMap.put( 1425 "scriptFromLanguage", 1426 new Transform<>() { 1427 @Override 1428 public String transform(String source0) { 1429 String language = hyphenSplitter.split(source0); 1430 String script = likelySubtags.getLikelyScript(language); 1431 if (script == null) { 1432 script = likelySubtags.getLikelyScript(language); 1433 } 1434 String scriptName = englishFile.getName(CLDRFile.SCRIPT_NAME, script); 1435 return "Languages in " 1436 + (script.equals("Hans") || script.equals("Hant") 1437 ? "Han Script" 1438 : scriptName.endsWith(" Script") 1439 ? scriptName 1440 : scriptName + " Script"); 1441 } 1442 }); 1443 functionMap.put( 1444 "categoryFromTerritory", 1445 catFromTerritory = 1446 new Transform<>() { 1447 @Override 1448 public String transform(String source) { 1449 String territory = getSubdivisionsTerritory(source, null); 1450 String container = Containment.getContainer(territory); 1451 order = Containment.getOrder(territory); 1452 return englishFile.getName(CLDRFile.TERRITORY_NAME, container); 1453 } 1454 }); 1455 functionMap.put( 1456 "territorySection", 1457 new Transform<>() { 1458 final Set<String> specialRegions = 1459 new HashSet<>(Arrays.asList("EZ", "EU", "QO", "UN", "ZZ")); 1460 1461 @Override 1462 public String transform(String source0) { 1463 // support subdivisions 1464 String theTerritory = getSubdivisionsTerritory(source0, null); 1465 try { 1466 if (specialRegions.contains(theTerritory) 1467 || theTerritory.charAt(0) < 'A' 1468 && Integer.parseInt(theTerritory) > 0) { 1469 return "Geographic Regions"; 1470 } 1471 } catch (NumberFormatException ex) { 1472 } 1473 String theContinent = Containment.getContinent(theTerritory); 1474 String theSubContinent; 1475 switch (theContinent) { // was Integer.valueOf 1476 case "019": // Americas - For the territorySection, we just group 1477 // North America & South America 1478 final String subcontinent = 1479 Containment.getSubcontinent(theTerritory); 1480 theSubContinent = 1481 subcontinent.equals("005") 1482 ? "005" 1483 : "003"; // was Integer.valueOf(subcontinent) == 1484 // 5 1485 return "Territories (" 1486 + englishFile.getName( 1487 CLDRFile.TERRITORY_NAME, theSubContinent) 1488 + ")"; 1489 case "001": 1490 case "ZZ": 1491 return "Geographic Regions"; // not in containment 1492 default: 1493 return "Territories (" 1494 + englishFile.getName( 1495 CLDRFile.TERRITORY_NAME, theContinent) 1496 + ")"; 1497 } 1498 } 1499 }); 1500 functionMap.put( 1501 "categoryFromTimezone", 1502 catFromTimezone = 1503 new Transform<>() { 1504 @Override 1505 public String transform(String source0) { 1506 String territory = Containment.getRegionFromZone(source0); 1507 if (territory == null) { 1508 territory = "ZZ"; 1509 } 1510 return catFromTerritory.transform(territory); 1511 } 1512 }); 1513 functionMap.put( 1514 "timeZonePage", 1515 new Transform<>() { 1516 Set<String> singlePageTerritories = 1517 new HashSet<>(Arrays.asList("AQ", "RU", "ZZ")); 1518 1519 @Override 1520 public String transform(String source0) { 1521 String theTerritory = Containment.getRegionFromZone(source0); 1522 if (theTerritory == null 1523 || "001".equals(theTerritory) 1524 || "ZZ".equals(theTerritory)) { 1525 if ("Etc/Unknown".equals(source0)) { 1526 theTerritory = "ZZ"; 1527 } else { 1528 throw new IllegalArgumentException( 1529 "ICU needs zone update? Source: " 1530 + source0 1531 + "; Territory: " 1532 + theTerritory); 1533 } 1534 } 1535 if (singlePageTerritories.contains(theTerritory)) { 1536 return englishFile.getName(CLDRFile.TERRITORY_NAME, theTerritory); 1537 } 1538 String theContinent = Containment.getContinent(theTerritory); 1539 final String subcontinent = Containment.getSubcontinent(theTerritory); 1540 String theSubContinent; 1541 switch (Integer.parseInt(theContinent)) { 1542 case 9: // Oceania - For the timeZonePage, we group Australasia on 1543 // one page, and the rest of Oceania on the other. 1544 try { 1545 theSubContinent = 1546 subcontinent.equals("053") ? "053" : "009"; // was 1547 // Integer.valueOf(subcontinent) == 1548 // 53 1549 } catch (NumberFormatException ex) { 1550 theSubContinent = "009"; 1551 } 1552 return englishFile.getName( 1553 CLDRFile.TERRITORY_NAME, theSubContinent); 1554 case 19: // Americas - For the timeZonePage, we just group North 1555 // America & South America 1556 theSubContinent = 1557 Integer.parseInt(subcontinent) == 5 ? "005" : "003"; 1558 return englishFile.getName( 1559 CLDRFile.TERRITORY_NAME, theSubContinent); 1560 case 142: // Asia 1561 return englishFile.getName( 1562 CLDRFile.TERRITORY_NAME, subcontinent); 1563 default: 1564 return englishFile.getName( 1565 CLDRFile.TERRITORY_NAME, theContinent); 1566 } 1567 } 1568 }); 1569 1570 functionMap.put( 1571 "timezoneSorting", 1572 new Transform<>() { 1573 @Override 1574 public String transform(String source) { 1575 final List<String> codeValues = 1576 Arrays.asList( 1577 "generic-long", 1578 "generic-short", 1579 "standard-long", 1580 "standard-short", 1581 "daylight-long", 1582 "daylight-short"); 1583 if (codeValues.contains(source)) { 1584 order = codeValues.indexOf(source); 1585 } else { 1586 order = codeValues.size(); 1587 } 1588 return source; 1589 } 1590 }); 1591 1592 functionMap.put( 1593 "tzdpField", 1594 new Transform<>() { 1595 @Override 1596 public String transform(String source) { 1597 Map<String, String> fieldNames = 1598 Builder.with(new HashMap<String, String>()) 1599 .put("regionFormat", "Region Format - Generic") 1600 .put( 1601 "regionFormat-standard", 1602 "Region Format - Standard") 1603 .put( 1604 "regionFormat-daylight", 1605 "Region Format - Daylight") 1606 .put("gmtFormat", "GMT Format") 1607 .put("hourFormat", "GMT Hours/Minutes Format") 1608 .put("gmtZeroFormat", "GMT Zero Format") 1609 .put("fallbackFormat", "Location Fallback Format") 1610 .freeze(); 1611 final List<String> fieldOrder = 1612 Arrays.asList( 1613 "regionFormat", 1614 "regionFormat-standard", 1615 "regionFormat-daylight", 1616 "gmtFormat", 1617 "hourFormat", 1618 "gmtZeroFormat", 1619 "fallbackFormat"); 1620 1621 if (fieldOrder.contains(source)) { 1622 order = fieldOrder.indexOf(source); 1623 } else { 1624 order = fieldOrder.size(); 1625 } 1626 1627 String result = fieldNames.get(source); 1628 return result == null ? source : result; 1629 } 1630 }); 1631 functionMap.put( 1632 "unit", 1633 new Transform<>() { 1634 @Override 1635 public String transform(String source) { 1636 int m = unitOrder.indexOf(source); 1637 order = m; 1638 return source.substring(source.indexOf('-') + 1); 1639 } 1640 }); 1641 1642 functionMap.put( 1643 "numericSort", 1644 new Transform<>() { 1645 // Probably only works well for small values, like -5 through +4. 1646 @Override 1647 public String transform(String source) { 1648 Integer pos = Integer.parseInt(source) + 5; 1649 suborder = new SubstringOrder(pos.toString()); 1650 return source; 1651 } 1652 }); 1653 1654 functionMap.put( 1655 "metazone", 1656 new Transform<>() { 1657 1658 @Override 1659 public String transform(String source) { 1660 if (PathHeader.UNIFORM_CONTINENTS) { 1661 String container = getMetazonePageTerritory(source); 1662 order = Containment.getOrder(container); 1663 return englishFile.getName(CLDRFile.TERRITORY_NAME, container); 1664 } else { 1665 String continent = metazoneToContinent.get(source); 1666 if (continent == null) { 1667 continent = "UnknownT"; 1668 } 1669 return continent; 1670 } 1671 } 1672 }); 1673 1674 Object[][] ctto = { 1675 {"BUK", "MM"}, 1676 {"CSD", "RS"}, 1677 {"CSK", "CZ"}, 1678 {"DDM", "DE"}, 1679 {"EUR", "ZZ"}, 1680 {"RHD", "ZW"}, 1681 {"SUR", "RU"}, 1682 {"TPE", "TL"}, 1683 {"XAG", "ZZ"}, 1684 {"XAU", "ZZ"}, 1685 {"XBA", "ZZ"}, 1686 {"XBB", "ZZ"}, 1687 {"XBC", "ZZ"}, 1688 {"XBD", "ZZ"}, 1689 {"XDR", "ZZ"}, 1690 {"XEU", "ZZ"}, 1691 {"XFO", "ZZ"}, 1692 {"XFU", "ZZ"}, 1693 {"XPD", "ZZ"}, 1694 {"XPT", "ZZ"}, 1695 {"XRE", "ZZ"}, 1696 {"XSU", "ZZ"}, 1697 {"XTS", "ZZ"}, 1698 {"XUA", "ZZ"}, 1699 {"XXX", "ZZ"}, 1700 {"YDD", "YE"}, 1701 {"YUD", "RS"}, 1702 {"YUM", "RS"}, 1703 {"YUN", "RS"}, 1704 {"YUR", "RS"}, 1705 {"ZRN", "CD"}, 1706 {"ZRZ", "CD"}, 1707 }; 1708 1709 Object[][] sctc = { 1710 {"Northern America", "North America (C)"}, 1711 {"Central America", "North America (C)"}, 1712 {"Caribbean", "North America (C)"}, 1713 {"South America", "South America (C)"}, 1714 {"Northern Africa", "Northern Africa"}, 1715 {"Western Africa", "Western Africa"}, 1716 {"Middle Africa", "Middle Africa"}, 1717 {"Eastern Africa", "Eastern Africa"}, 1718 {"Southern Africa", "Southern Africa"}, 1719 {"Europe", "Northern/Western Europe"}, 1720 {"Northern Europe", "Northern/Western Europe"}, 1721 {"Western Europe", "Northern/Western Europe"}, 1722 {"Eastern Europe", "Southern/Eastern Europe"}, 1723 {"Southern Europe", "Southern/Eastern Europe"}, 1724 {"Western Asia", "Western Asia (C)"}, 1725 {"Central Asia", "Central Asia (C)"}, 1726 {"Eastern Asia", "Eastern Asia (C)"}, 1727 {"Southern Asia", "Southern Asia (C)"}, 1728 {"Southeast Asia", "Southeast Asia (C)"}, 1729 {"Australasia", "Oceania (C)"}, 1730 {"Melanesia", "Oceania (C)"}, 1731 {"Micronesian Region", "Oceania (C)"}, // HACK 1732 {"Polynesia", "Oceania (C)"}, 1733 {"Unknown Region", "Unknown Region (C)"}, 1734 }; 1735 1736 final Map<String, String> currencyToTerritoryOverrides = CldrUtility.asMap(ctto); 1737 final Map<String, String> subContinentToContinent = CldrUtility.asMap(sctc); 1738 final Set<String> fundCurrencies = 1739 new HashSet<>( 1740 Arrays.asList( 1741 "CHE", "CHW", "CLF", "COU", "ECV", "MXV", "USN", "USS", "UYI", 1742 "XEU", "ZAL")); 1743 final Set<String> offshoreCurrencies = new HashSet<>(Arrays.asList("CNH")); 1744 // TODO: Put this into supplementalDataInfo ? 1745 1746 functionMap.put( 1747 "categoryFromCurrency", 1748 new Transform<>() { 1749 @Override 1750 public String transform(String source0) { 1751 String tenderOrNot = ""; 1752 String territory = 1753 likelySubtags.getLikelyTerritoryFromCurrency(source0); 1754 if (territory == null) { 1755 String tag; 1756 if (fundCurrencies.contains(source0)) { 1757 tag = " (fund)"; 1758 } else if (offshoreCurrencies.contains(source0)) { 1759 tag = " (offshore)"; 1760 } else { 1761 tag = " (old)"; 1762 } 1763 tenderOrNot = ": " + source0 + tag; 1764 } 1765 if (currencyToTerritoryOverrides.keySet().contains(source0)) { 1766 territory = currencyToTerritoryOverrides.get(source0); 1767 } else if (territory == null) { 1768 territory = source0.substring(0, 2); 1769 } 1770 1771 if (territory.equals("ZZ")) { 1772 order = 999; 1773 return englishFile.getName(CLDRFile.TERRITORY_NAME, territory) 1774 + ": " 1775 + source0; 1776 } else { 1777 return catFromTerritory.transform(territory) 1778 + ": " 1779 + englishFile.getName(CLDRFile.TERRITORY_NAME, territory) 1780 + tenderOrNot; 1781 } 1782 } 1783 }); 1784 functionMap.put( 1785 "continentFromCurrency", 1786 new Transform<>() { 1787 @Override 1788 public String transform(String source0) { 1789 String subContinent; 1790 String territory = 1791 likelySubtags.getLikelyTerritoryFromCurrency(source0); 1792 if (currencyToTerritoryOverrides.keySet().contains(source0)) { 1793 territory = currencyToTerritoryOverrides.get(source0); 1794 } else if (territory == null) { 1795 territory = source0.substring(0, 2); 1796 } 1797 1798 if (territory.equals("ZZ")) { 1799 order = 999; 1800 subContinent = 1801 englishFile.getName(CLDRFile.TERRITORY_NAME, territory); 1802 } else { 1803 subContinent = catFromTerritory.transform(territory); 1804 } 1805 1806 String result = 1807 subContinentToContinent.get( 1808 subContinent); // the continent is the last word in the 1809 // territory representation 1810 return result; 1811 } 1812 }); 1813 functionMap.put( 1814 "numberingSystem", 1815 new Transform<>() { 1816 @Override 1817 public String transform(String source0) { 1818 if ("latn".equals(source0)) { 1819 return ""; 1820 } 1821 String displayName = 1822 englishFile.getStringValue( 1823 "//ldml/localeDisplayNames/types/type[@key=\"numbers\"][@type=\"" 1824 + source0 1825 + "\"]"); 1826 return "using " 1827 + (displayName == null 1828 ? source0 1829 : displayName + " (" + source0 + ")"); 1830 } 1831 }); 1832 1833 functionMap.put( 1834 "datefield", 1835 new Transform<>() { 1836 private final String[] datefield = { 1837 "era", "era-short", "era-narrow", 1838 "century", "century-short", "century-narrow", 1839 "year", "year-short", "year-narrow", 1840 "quarter", "quarter-short", "quarter-narrow", 1841 "month", "month-short", "month-narrow", 1842 "week", "week-short", "week-narrow", 1843 "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow", 1844 "day", "day-short", "day-narrow", 1845 "dayOfYear", "dayOfYear-short", "dayOfYear-narrow", 1846 "weekday", "weekday-short", "weekday-narrow", 1847 "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow", 1848 "dayperiod", "dayperiod-short", "dayperiod-narrow", 1849 "zone", "zone-short", "zone-narrow", 1850 "hour", "hour-short", "hour-narrow", 1851 "minute", "minute-short", "minute-narrow", 1852 "second", "second-short", "second-narrow", 1853 "millisecond", "millisecond-short", "millisecond-narrow", 1854 "microsecond", "microsecond-short", "microsecond-narrow", 1855 "nanosecond", "nanosecond-short", "nanosecond-narrow", 1856 }; 1857 1858 @Override 1859 public String transform(String source) { 1860 order = getIndex(source, datefield); 1861 return source; 1862 } 1863 }); 1864 // //ldml/dates/fields/field[@type="%A"]/relative[@type="%A"] 1865 functionMap.put( 1866 "relativeDate", 1867 new Transform<>() { 1868 private final String[] relativeDateField = { 1869 "year", "year-short", "year-narrow", 1870 "quarter", "quarter-short", "quarter-narrow", 1871 "month", "month-short", "month-narrow", 1872 "week", "week-short", "week-narrow", 1873 "day", "day-short", "day-narrow", 1874 "hour", "hour-short", "hour-narrow", 1875 "minute", "minute-short", "minute-narrow", 1876 "second", "second-short", "second-narrow", 1877 "sun", "sun-short", "sun-narrow", 1878 "mon", "mon-short", "mon-narrow", 1879 "tue", "tue-short", "tue-narrow", 1880 "wed", "wed-short", "wed-narrow", 1881 "thu", "thu-short", "thu-narrow", 1882 "fri", "fri-short", "fri-narrow", 1883 "sat", "sat-short", "sat-narrow", 1884 }; 1885 private final String[] longNames = { 1886 "Year", "Year Short", "Year Narrow", 1887 "Quarter", "Quarter Short", "Quarter Narrow", 1888 "Month", "Month Short", "Month Narrow", 1889 "Week", "Week Short", "Week Narrow", 1890 "Day", "Day Short", "Day Narrow", 1891 "Hour", "Hour Short", "Hour Narrow", 1892 "Minute", "Minute Short", "Minute Narrow", 1893 "Second", "Second Short", "Second Narrow", 1894 "Sunday", "Sunday Short", "Sunday Narrow", 1895 "Monday", "Monday Short", "Monday Narrow", 1896 "Tuesday", "Tuesday Short", "Tuesday Narrow", 1897 "Wednesday", "Wednesday Short", "Wednesday Narrow", 1898 "Thursday", "Thursday Short", "Thursday Narrow", 1899 "Friday", "Friday Short", "Friday Narrow", 1900 "Saturday", "Saturday Short", "Saturday Narrow", 1901 }; 1902 1903 @Override 1904 public String transform(String source) { 1905 order = getIndex(source, relativeDateField) + 100; 1906 return "Relative " + longNames[getIndex(source, relativeDateField)]; 1907 } 1908 }); 1909 // Sorts numberSystem items (except for decimal formats). 1910 functionMap.put( 1911 "number", 1912 new Transform<>() { 1913 private final String[] symbols = { 1914 "decimal", 1915 "group", 1916 "plusSign", 1917 "minusSign", 1918 "approximatelySign", 1919 "percentSign", 1920 "perMille", 1921 "exponential", 1922 "superscriptingExponent", 1923 "infinity", 1924 "nan", 1925 "list", 1926 "currencies" 1927 }; 1928 1929 @Override 1930 public String transform(String source) { 1931 String[] parts = source.split("-"); 1932 order = getIndex(parts[0], symbols); 1933 // e.g. "currencies-one" 1934 if (parts.length > 1) { 1935 suborder = new SubstringOrder(parts[1]); 1936 } 1937 return source; 1938 } 1939 }); 1940 functionMap.put( 1941 "numberFormat", 1942 new Transform<>() { 1943 @Override 1944 public String transform(String source) { 1945 final List<String> fieldOrder = 1946 Arrays.asList( 1947 "standard-decimal", 1948 "standard-currency", 1949 "standard-currency-accounting", 1950 "standard-percent", 1951 "standard-scientific"); 1952 1953 if (fieldOrder.contains(source)) { 1954 order = fieldOrder.indexOf(source); 1955 } else { 1956 order = fieldOrder.size(); 1957 } 1958 1959 return source; 1960 } 1961 }); 1962 1963 functionMap.put( 1964 "localePattern", 1965 new Transform<>() { 1966 @Override 1967 public String transform(String source) { 1968 // Put localeKeyTypePattern behind localePattern and 1969 // localeSeparator. 1970 if (source.equals("localeKeyTypePattern")) { 1971 order = 10; 1972 } 1973 return source; 1974 } 1975 }); 1976 functionMap.put( 1977 "listOrder", 1978 new Transform<>() { 1979 private String[] listParts = {"2", "start", "middle", "end"}; 1980 1981 @Override 1982 public String transform(String source) { 1983 order = getIndex(source, listParts); 1984 return source; 1985 } 1986 }); 1987 1988 functionMap.put( 1989 "personNameSection", 1990 new Transform<>() { 1991 @Override 1992 public String transform(String source) { 1993 // sampleName item values in desired sort order 1994 final List<String> itemValues = 1995 PersonNameFormatter.SampleType.ALL_STRINGS; 1996 if (source.equals("NameOrder")) { 1997 order = 0; 1998 return "NameOrder for Locales"; 1999 } 2000 if (source.equals("Parameters")) { 2001 order = 4; 2002 return "Default Parameters"; 2003 } 2004 2005 if (source.equals("AuxiliaryItems")) { 2006 order = 10; 2007 return source; 2008 } 2009 String itemPrefix = "SampleName:"; 2010 if (source.startsWith(itemPrefix)) { 2011 String itemValue = source.substring(itemPrefix.length()); 2012 order = 20 + itemValues.indexOf(itemValue); 2013 return "SampleName Fields for Item: " + itemValue; 2014 } 2015 String pnPrefix = "PersonName:"; 2016 if (source.startsWith(pnPrefix)) { 2017 String attrValues = source.substring(pnPrefix.length()); 2018 List<String> parts = HYPHEN_SPLITTER.splitToList(attrValues); 2019 2020 String nameOrder = parts.get(0); 2021 if (nameOrder.contentEquals("sorting")) { 2022 order = 40; 2023 return "PersonName Sorting Patterns (Usage: referring)"; 2024 } 2025 order = 30; 2026 if (nameOrder.contentEquals("surnameFirst")) { 2027 order += 1; 2028 } 2029 String nameUsage = parts.get(1); 2030 if (nameUsage.contentEquals("monogram")) { 2031 order += 20; 2032 return "PersonName Monogram Patterns for Order: " + nameOrder; 2033 } 2034 return "PersonName Main Patterns for Order: " + nameOrder; 2035 } 2036 order = 60; 2037 return source; 2038 } 2039 }); 2040 2041 functionMap.put( 2042 "personNameOrder", 2043 new Transform<>() { 2044 @Override 2045 public String transform(String source) { 2046 // personName attribute values: each group in desired 2047 // sort order, but groups from least important to most 2048 final List<String> attrValues = 2049 Arrays.asList( 2050 "referring", 2051 "addressing", // usage values to include 2052 "formal", 2053 "informal", // formality values 2054 "long", 2055 "medium", 2056 "short"); // length values 2057 // order & length values handled in &personNameSection 2058 2059 List<String> parts = HYPHEN_SPLITTER.splitToList(source); 2060 order = 0; 2061 String attributes = ""; 2062 boolean skipReferring = false; 2063 for (String part : parts) { 2064 if (attrValues.contains(part)) { 2065 order += (1 << attrValues.indexOf(part)); 2066 // anything else like alt="variant" is at order 0 2067 if (!skipReferring || !part.contentEquals("referring")) { 2068 // Add this part to display attribute string 2069 if (attributes.length() == 0) { 2070 attributes = part; 2071 } else { 2072 attributes = attributes + "-" + part; 2073 } 2074 } 2075 } else if (part.contentEquals("sorting")) { 2076 skipReferring = true; // For order=sorting, don't display 2077 // usage=referring 2078 } 2079 } 2080 return attributes; 2081 } 2082 }); 2083 2084 functionMap.put( 2085 "sampleNameOrder", 2086 new Transform<>() { 2087 @Override 2088 public String transform(String source) { 2089 // The various nameField attribute values: each group in desired 2090 // sort order, but groups from least important to most 2091 final List<String> attrValues = 2092 Arrays.asList( 2093 "informal", 2094 "prefix", 2095 "core", // modifiers for nameField type 2096 "prefix", 2097 "given", 2098 "given2", 2099 "surname", 2100 "surname2", 2101 "suffix"); // values for nameField type 2102 2103 List<String> parts = HYPHEN_SPLITTER.splitToList(source); 2104 order = 0; 2105 for (String part : parts) { 2106 if (attrValues.contains(part)) { 2107 order += (1 << attrValues.indexOf(part)); 2108 } // anything else like alt="variant" is at order 0 2109 } 2110 return source; 2111 } 2112 }); 2113 2114 functionMap.put( 2115 "alphaOrder", 2116 new Transform<>() { 2117 @Override 2118 public String transform(String source) { 2119 order = 0; 2120 return source; 2121 } 2122 }); 2123 functionMap.put( 2124 "transform", 2125 new Transform<>() { 2126 Splitter commas = Splitter.on(',').trimResults(); 2127 2128 @Override 2129 public String transform(String source) { 2130 List<String> parts = commas.splitToList(source); 2131 return parts.get(1) 2132 + (parts.get(0).equals("both") ? "↔︎" : "→") 2133 + parts.get(2) 2134 + (parts.size() > 3 ? "/" + parts.get(3) : ""); 2135 } 2136 }); 2137 functionMap.put( 2138 "major", 2139 new Transform<>() { 2140 @Override 2141 public String transform(String source) { 2142 return getCharacterPageId(source).toString(); 2143 } 2144 }); 2145 functionMap.put( 2146 "minor", 2147 new Transform<>() { 2148 @Override 2149 public String transform(String source) { 2150 String minorCat = Emoji.getMinorCategory(source); 2151 order = Emoji.getEmojiMinorOrder(minorCat); 2152 return minorCat; 2153 } 2154 }); 2155 /** 2156 * Use the ordering of the emoji in getEmojiToOrder rather than alphabetic, since the 2157 * collator data won't be ready until the candidates are final. 2158 */ 2159 functionMap.put( 2160 "emoji", 2161 new Transform<>() { 2162 @Override 2163 public String transform(String source) { 2164 int dashPos = source.indexOf(' '); 2165 String emoji = source.substring(0, dashPos); 2166 order = 2167 (Emoji.getEmojiToOrder(emoji) << 1) 2168 + (source.endsWith("name") ? 0 : 1); 2169 return source; 2170 } 2171 }); 2172 } 2173 2174 private static int getIndex(String item, String[] array) { 2175 for (int i = 0; i < array.length; i++) { 2176 if (item.equals(array[i])) { 2177 return i; 2178 } 2179 } 2180 return -1; 2181 } 2182 2183 private static String getEnglishFirstLetter(String s) { 2184 String languageOnlyPart; 2185 int underscorePos = s.indexOf("_"); 2186 if (underscorePos > 0) { 2187 languageOnlyPart = s.substring(0, underscorePos); 2188 } else { 2189 languageOnlyPart = s; 2190 } 2191 final String name = englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart); 2192 return name == null ? "?" : name.substring(0, 1).toUpperCase(); 2193 } 2194 2195 static class HyphenSplitter { 2196 String main; 2197 String extras; 2198 2199 String split(String source) { 2200 int hyphenPos = source.indexOf('-'); 2201 if (hyphenPos < 0) { 2202 main = source; 2203 extras = ""; 2204 } else { 2205 main = source.substring(0, hyphenPos); 2206 extras = source.substring(hyphenPos); 2207 } 2208 return main; 2209 } 2210 } 2211 2212 /** 2213 * This converts "functions", like &month, and sets the order. 2214 * 2215 * @param input 2216 * @param orderIn 2217 * @return 2218 */ 2219 private static String fix(String input, int orderIn) { 2220 input = RegexLookup.replace(input, args.value); 2221 order = orderIn; 2222 suborder = null; 2223 int pos = 0; 2224 while (true) { 2225 int functionStart = input.indexOf('&', pos); 2226 if (functionStart < 0) { 2227 return adjustPageForPath(input, args.value[0] /* path */).toString(); 2228 } 2229 int functionEnd = input.indexOf('(', functionStart); 2230 int argEnd = 2231 input.indexOf( 2232 ')', functionEnd + 2); // we must insert at least one character 2233 Transform<String, String> func = 2234 functionMap.get(input.substring(functionStart + 1, functionEnd)); 2235 final String arg = input.substring(functionEnd + 1, argEnd); 2236 String temp = func.transform(arg); 2237 if (temp == null) { 2238 func.transform(arg); 2239 throw new IllegalArgumentException( 2240 "Function returns invalid results for «" + arg + "»."); 2241 } 2242 input = input.substring(0, functionStart) + temp + input.substring(argEnd + 1); 2243 pos = functionStart + temp.length(); 2244 } 2245 } 2246 2247 private static String adjustPageForPath(String input, String path) { 2248 if ("Fields".equals(input)) { 2249 return getFieldsPageId(path).toString(); 2250 } 2251 if ("Length".equals(input)) { 2252 return getLengthPageId(path).toString(); 2253 } 2254 if ("Other Units".equals(input)) { 2255 return getOtherUnitsPageId(path).toString(); 2256 } 2257 if ("Volume".equals(input)) { 2258 return getVolumePageId(path).toString(); 2259 } 2260 return input; 2261 } 2262 2263 private static PageId getFieldsPageId(String path) { 2264 XPathParts parts = XPathParts.getFrozenInstance(path); 2265 return (parts.containsElement("relative") 2266 || parts.containsElement("relativeTime") 2267 || parts.containsElement("relativePeriod")) 2268 ? PageId.Relative 2269 : PageId.Fields; 2270 } 2271 2272 private static Set<UnitConverter.UnitSystem> METRIC_UNITS = 2273 Set.of(UnitConverter.UnitSystem.metric, UnitConverter.UnitSystem.metric_adjacent); 2274 2275 private static Set<UnitConverter.UnitSystem> US_UNITS = 2276 Set.of(UnitConverter.UnitSystem.ussystem); 2277 2278 private static PageId getLengthPageId(String path) { 2279 final String shortUnitId = getShortUnitId(path); 2280 if (isSystemUnit(shortUnitId, METRIC_UNITS)) { 2281 return PageId.Length_Metric; 2282 } else { 2283 // Could further subdivide into US/Other with isSystemUnit(shortUnitId, US_UNITS) 2284 return PageId.Length_Other; 2285 } 2286 } 2287 2288 private static PageId getVolumePageId(String path) { 2289 final String shortUnitId = getShortUnitId(path); 2290 if (isSystemUnit(shortUnitId, METRIC_UNITS)) { 2291 return PageId.Volume_Metric; 2292 } else { 2293 return isSystemUnit(shortUnitId, US_UNITS) ? PageId.Volume_US : PageId.Volume_Other; 2294 } 2295 } 2296 2297 private static PageId getOtherUnitsPageId(String path) { 2298 String shortUnitId = getShortUnitId(path); 2299 if (isSystemUnit(shortUnitId, METRIC_UNITS)) { 2300 return shortUnitId.contains("per") 2301 ? PageId.OtherUnitsMetricPer 2302 : PageId.OtherUnitsMetric; 2303 } else { 2304 return isSystemUnit(shortUnitId, US_UNITS) 2305 ? PageId.OtherUnitsUS 2306 : PageId.OtherUnits; 2307 } 2308 } 2309 2310 private static boolean isSystemUnit( 2311 String shortUnitId, Set<UnitConverter.UnitSystem> system) { 2312 final UnitConverter uc = supplementalDataInfo.getUnitConverter(); 2313 final Set<UnitConverter.UnitSystem> systems = uc.getSystemsEnum(shortUnitId); 2314 return !Collections.disjoint(system, systems); 2315 } 2316 2317 private static String getShortUnitId(String path) { 2318 // Extract the unit from the path. For example, if path is 2319 // //ldml/units/unitLength[@type="narrow"]/unit[@type="volume-cubic-kilometer"]/displayName 2320 // then extract "volume-cubic-kilometer" which is the long unit id 2321 final String longUnitId = 2322 XPathParts.getFrozenInstance(path).findAttributeValue("unit", "type"); 2323 if (longUnitId == null) { 2324 throw new InternalCldrException("Missing unit in path " + path); 2325 } 2326 final UnitConverter uc = supplementalDataInfo.getUnitConverter(); 2327 // Convert, for example, "volume-cubic-kilometer" to "cubic-kilometer" 2328 return uc.getShortId(longUnitId); 2329 } 2330 2331 /** 2332 * Collect all the paths for a CLDRFile, and make sure that they have cached PathHeaders 2333 * 2334 * @param file 2335 * @return immutable set of paths in the file 2336 */ 2337 public Set<String> pathsForFile(CLDRFile file) { 2338 // make sure we cache all the path headers 2339 HashSet<String> filePaths = new HashSet<>(); 2340 file.fullIterable().forEach(filePaths::add); 2341 for (String path : filePaths) { 2342 try { 2343 fromPath(path); // call to make sure cached 2344 } catch (Throwable t) { 2345 // ... some other exception 2346 } 2347 } 2348 return Collections.unmodifiableSet(filePaths); 2349 } 2350 2351 /** 2352 * Returns those regexes that were never matched. 2353 * 2354 * @return 2355 */ 2356 public Set<String> getUnmatchedRegexes() { 2357 Map<String, RawData> outputUnmatched = new LinkedHashMap<>(); 2358 lookup.getUnmatchedPatterns(matchersFound, outputUnmatched); 2359 return outputUnmatched.keySet(); 2360 } 2361 } 2362 2363 /** 2364 * Return the territory used for the title of the Metazone page in the Survey Tool. 2365 * 2366 * @param source 2367 * @return 2368 */ 2369 public static String getMetazonePageTerritory(String source) { 2370 String result = metazoneToPageTerritory.get(source); 2371 return result == null ? "ZZ" : result; 2372 } 2373 2374 private static final List<String> COUNTS = 2375 Arrays.asList("displayName", "zero", "one", "two", "few", "many", "other", "per"); 2376 2377 private static Collator alphabetic; 2378 2379 private static int alphabeticCompare(String aa, String bb) { 2380 if (alphabetic == null) { 2381 initializeAlphabetic(); 2382 } 2383 return alphabetic.compare(aa, bb); 2384 } 2385 2386 private static synchronized void initializeAlphabetic() { 2387 // Lazy initialization: don't call CLDRConfig.getInstance() too early or we'll get 2388 // "CLDRConfig.getInstance() was called prior to SurveyTool setup" when called from 2389 // com.ibm.ws.microprofile.openapi.impl.core.jackson.ModelResolver._addEnumProps 2390 if (alphabetic == null) { 2391 alphabetic = CLDRConfig.getInstance().getCollatorRoot(); 2392 } 2393 } 2394 2395 /** 2396 * @deprecated use CLDRConfig.getInstance().urls() instead 2397 */ 2398 @Deprecated 2399 public enum BaseUrl { 2400 // http://st.unicode.org/smoketest/survey?_=af&strid=55053dffac611328 2401 // http://st.unicode.org/cldr-apps/survey?_=en&strid=3cd31261bf6738e1 2402 SMOKE("https://st.unicode.org/smoketest/survey"), 2403 PRODUCTION("https://st.unicode.org/cldr-apps/survey"); 2404 final String base; 2405 2406 private BaseUrl(String url) { 2407 base = url; 2408 } 2409 } 2410 2411 /** 2412 * @deprecated, use CLDRConfig.urls().forPathHeader() instead. 2413 * @param baseUrl 2414 * @param locale 2415 * @return 2416 */ 2417 public String getUrl(BaseUrl baseUrl, String locale) { 2418 return getUrl(baseUrl.base, locale); 2419 } 2420 2421 /** 2422 * @deprecated, use CLDRConfig.urls().forPathHeader() instead. 2423 * @param baseUrl 2424 * @param locale 2425 * @return 2426 */ 2427 public String getUrl(String baseUrl, String locale) { 2428 return getUrl(baseUrl, locale, getOriginalPath()); 2429 } 2430 2431 /** 2432 * Map http://st.unicode.org/smoketest/survey to http://st.unicode.org/smoketest etc 2433 * 2434 * @param str 2435 * @return 2436 */ 2437 public static String trimLast(String str) { 2438 int n = str.lastIndexOf('/'); 2439 if (n == -1) return ""; 2440 return str.substring(0, n + 1); 2441 } 2442 2443 public static String getUrlForLocalePath(String locale, String path) { 2444 return getUrl(SURVEY_URL, locale, path); 2445 } 2446 2447 public static String getUrl(String baseUrl, String locale, String path) { 2448 return trimLast(baseUrl) + "v#/" + locale + "//" + StringId.getHexId(path); 2449 } 2450 2451 /** 2452 * @deprecated use the version with CLDRURLS instead 2453 * @param baseUrl 2454 * @param file 2455 * @param path 2456 * @return 2457 */ 2458 @Deprecated 2459 public static String getLinkedView(String baseUrl, CLDRFile file, String path) { 2460 return SECTION_LINK 2461 + PathHeader.getUrl(baseUrl, file.getLocaleID(), path) 2462 + "'><em>view</em></a>"; 2463 } 2464 2465 public static String getLinkedView(CLDRURLS urls, CLDRFile file, String path) { 2466 return SECTION_LINK + urls.forXpath(file.getLocaleID(), path) + "'><em>view</em></a>"; 2467 } 2468 2469 private static final String SURVEY_URL = CLDRConfig.getInstance().urls().base(); 2470 2471 /** 2472 * If a subdivision, return the (uppercased) territory and if suffix != null, the suffix. 2473 * Otherwise return the input as is. 2474 * 2475 * @param input 2476 * @param suffix 2477 * @return 2478 */ 2479 private static String getSubdivisionsTerritory(String input, Output<String> suffix) { 2480 String theTerritory; 2481 if (StandardCodes.LstrType.subdivision.isWellFormed(input)) { 2482 int territoryEnd = input.charAt(0) < 'A' ? 3 : 2; 2483 theTerritory = input.substring(0, territoryEnd).toUpperCase(Locale.ROOT); 2484 if (suffix != null) { 2485 suffix.value = input.substring(territoryEnd); 2486 } 2487 } else { 2488 theTerritory = input; 2489 if (suffix != null) { 2490 suffix.value = ""; 2491 } 2492 } 2493 return theTerritory; 2494 } 2495 2496 /** 2497 * Should this path header be hidden? 2498 * 2499 * @return true to hide, else false 2500 */ 2501 public boolean shouldHide() { 2502 switch (status) { 2503 case HIDE: 2504 case DEPRECATED: 2505 return true; 2506 case READ_ONLY: 2507 case READ_WRITE: 2508 case LTR_ALWAYS: 2509 return false; 2510 default: 2511 logger.log(java.util.logging.Level.SEVERE, "Missing case for " + status); 2512 return false; 2513 } 2514 } 2515 2516 /** 2517 * Are reading and writing allowed for this path header? 2518 * 2519 * @return true if reading and writing are allowed, else false 2520 */ 2521 public boolean canReadAndWrite() { 2522 switch (status) { 2523 case READ_WRITE: 2524 case LTR_ALWAYS: 2525 return true; 2526 case HIDE: 2527 case DEPRECATED: 2528 case READ_ONLY: 2529 return false; 2530 default: 2531 logger.log(java.util.logging.Level.SEVERE, "Missing case for " + status); 2532 return false; 2533 } 2534 } 2535 2536 private static UnicodeMap<PageId> nonEmojiMap = null; 2537 2538 /** 2539 * Return the PageId for the given character 2540 * 2541 * @param cp the character as a string 2542 * @return the PageId 2543 */ 2544 private static PageId getCharacterPageId(String cp) { 2545 if (Emoji.getAllRgiNoES().contains(cp)) { 2546 return Emoji.getPageId(cp); 2547 } 2548 if (nonEmojiMap == null) { 2549 nonEmojiMap = createNonEmojiMap(); 2550 } 2551 PageId pageId = nonEmojiMap.get(cp); 2552 if (pageId == null) { 2553 throw new InternalCldrException("Failure getting character page id"); 2554 } 2555 return pageId; 2556 } 2557 2558 /** 2559 * Create the map from non-emoji characters to pages. Call with lazy initialization to avoid 2560 * static initialization bugs, otherwise PageId.OtherSymbols could still be null. 2561 * 2562 * @return the map from character to PageId 2563 */ 2564 private static UnicodeMap<PageId> createNonEmojiMap() { 2565 return new UnicodeMap<PageId>() 2566 .putAll(new UnicodeSet("[:P:]"), PageId.Punctuation) 2567 .putAll(new UnicodeSet("[:Sm:]"), PageId.MathSymbols) 2568 .putAll(new UnicodeSet("[^[:Sm:][:P:]]"), PageId.OtherSymbols) 2569 .freeze(); 2570 } 2571 } 2572