1 package org.unicode.cldr.util; 2 3 import java.util.Arrays; 4 import java.util.Collections; 5 import java.util.EnumMap; 6 import java.util.HashMap; 7 import java.util.HashSet; 8 import java.util.Iterator; 9 import java.util.LinkedHashMap; 10 import java.util.LinkedHashSet; 11 import java.util.List; 12 import java.util.Locale; 13 import java.util.Map; 14 import java.util.Map.Entry; 15 import java.util.Set; 16 import java.util.TreeMap; 17 import java.util.TreeSet; 18 import java.util.logging.Logger; 19 import java.util.regex.Matcher; 20 import java.util.regex.Pattern; 21 22 import org.unicode.cldr.draft.ScriptMetadata; 23 import org.unicode.cldr.draft.ScriptMetadata.Info; 24 import org.unicode.cldr.tool.LikelySubtags; 25 import org.unicode.cldr.util.RegexLookup.Finder; 26 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 27 import org.unicode.cldr.util.With.SimpleIterator; 28 29 import com.google.common.base.Splitter; 30 import com.ibm.icu.impl.Relation; 31 import com.ibm.icu.impl.Row; 32 import com.ibm.icu.lang.UCharacter; 33 import com.ibm.icu.text.Collator; 34 import com.ibm.icu.text.Transform; 35 import com.ibm.icu.util.ICUException; 36 import com.ibm.icu.util.Output; 37 import com.ibm.icu.util.ULocale; 38 39 /** 40 * Provides a mechanism for dividing up LDML paths into understandable 41 * categories, eg for the Survey tool. 42 */ 43 public class PathHeader implements Comparable<PathHeader> { 44 /** 45 * Link to a section. Commenting out the page switch for now. 46 */ 47 public static final String SECTION_LINK = "<a " + /* "target='CLDR_ST-SECTION' "+*/"href='"; 48 static boolean UNIFORM_CONTINENTS = true; 49 static Factory factorySingleton = null; 50 51 static final boolean SKIP_ORIGINAL_PATH = true; 52 53 private final static Logger logger = Logger.getLogger(PathHeader.class.getName()); 54 55 static final Splitter HYPHEN_SPLITTER = Splitter.on('-'); 56 57 public enum Width {FULL, LONG, WIDE, SHORT, NARROW; getValue(String input)58 public static Width getValue(String input) { 59 try { 60 return Width.valueOf(input.toUpperCase(Locale.ENGLISH)); 61 } catch (RuntimeException e) { 62 e.printStackTrace(); 63 throw e; 64 } 65 } 66 @Override toString()67 public String toString() { 68 return name().toLowerCase(Locale.ENGLISH); 69 } 70 } 71 72 /** 73 * What status the survey tool should use. Can be overridden in 74 * Phase.getAction() 75 */ 76 public enum SurveyToolStatus { 77 /** 78 * Never show. 79 */ 80 DEPRECATED, 81 /** 82 * Hide. Can be overridden in Phase.getAction() 83 */ 84 HIDE, 85 /** 86 * Don't allow Change box (except TC), instead show ticket. But allow 87 * votes. Can be overridden in Phase.getAction() 88 */ 89 READ_ONLY, 90 /** 91 * Allow change box and votes. Can be overridden in Phase.getAction() 92 */ 93 READ_WRITE, 94 /** 95 * Changes are allowed as READ_WRITE, but field is always displayed as 96 * LTR, even in RTL locales (used for patterns). 97 */ 98 LTR_ALWAYS; 99 } 100 101 private static EnumNames<SectionId> SectionIdNames = new EnumNames<>(); 102 103 /** 104 * The Section for a path. Don't change these without committee buy-in. The 105 * 'name' may be 'Core_Data' and the toString is 'Core Data' toString gives 106 * the human name 107 */ 108 public enum SectionId { 109 Core_Data("Core Data"), Locale_Display_Names("Locale Display Names"), DateTime("Date & Time"), Timezones, Numbers, Currencies, Units, Characters, Misc( 110 "Miscellaneous"), BCP47, Supplemental, Special; 111 SectionId(String... alternateNames)112 private SectionId(String... alternateNames) { 113 SectionIdNames.add(this, alternateNames); 114 } 115 forString(String name)116 public static SectionId forString(String name) { 117 return SectionIdNames.forString(name); 118 } 119 120 @Override toString()121 public String toString() { 122 return SectionIdNames.toString(this); 123 } 124 } 125 126 private static EnumNames<PageId> PageIdNames = new EnumNames<>(); 127 private static Relation<SectionId, PageId> SectionIdToPageIds = Relation.of(new TreeMap<SectionId, Set<PageId>>(), 128 TreeSet.class); 129 130 private static class SubstringOrder implements Comparable<SubstringOrder> { 131 final String mainOrder; 132 final int order; 133 SubstringOrder(String source)134 public SubstringOrder(String source) { 135 int pos = source.lastIndexOf('-') + 1; 136 int ordering = COUNTS.indexOf(source.substring(pos)); 137 // account for digits, and "some" future proofing. 138 order = ordering < 0 139 ? source.charAt(pos) 140 : 0x10000 + ordering; 141 mainOrder = source.substring(0, pos); 142 } 143 144 @Override 145 public String toString() { 146 return "{" + mainOrder + ", " + order + "}"; 147 } 148 149 @Override 150 public int compareTo(SubstringOrder other) { 151 int diff = alphabeticCompare(mainOrder, other.mainOrder); 152 if (diff != 0) { 153 return diff; 154 } 155 return order - other.order; 156 } 157 } 158 159 /** 160 * The Page for a path (within a Section). Don't change these without 161 * committee buy-in. the name is for example WAsia where toString gives 162 * Western Asia 163 */ 164 public enum PageId { 165 Alphabetic_Information(SectionId.Core_Data, "Alphabetic Information"), 166 Numbering_Systems(SectionId.Core_Data, "Numbering Systems"), 167 LinguisticElements(SectionId.Core_Data, "Linguistic Elements"), 168 169 Locale_Name_Patterns(SectionId.Locale_Display_Names, "Locale Name Patterns"), 170 Languages_A_D(SectionId.Locale_Display_Names, "Languages (A-D)"), 171 Languages_E_J(SectionId.Locale_Display_Names, "Languages (E-J)"), 172 Languages_K_N(SectionId.Locale_Display_Names, "Languages (K-N)"), 173 Languages_O_S(SectionId.Locale_Display_Names, "Languages (O-S)"), 174 Languages_T_Z(SectionId.Locale_Display_Names, "Languages (T-Z)"), 175 Scripts(SectionId.Locale_Display_Names), 176 Territories(SectionId.Locale_Display_Names, "Geographic Regions"), 177 T_NAmerica(SectionId.Locale_Display_Names, "Territories (North America)"), 178 T_SAmerica( SectionId.Locale_Display_Names, "Territories (South America)"), 179 T_Africa(SectionId.Locale_Display_Names, "Territories (Africa)"), 180 T_Europe( SectionId.Locale_Display_Names, "Territories (Europe)"), 181 T_Asia(SectionId.Locale_Display_Names, "Territories (Asia)"), 182 T_Oceania( SectionId.Locale_Display_Names, "Territories (Oceania)"), 183 Locale_Variants(SectionId.Locale_Display_Names, "Locale Variants"), 184 Keys( SectionId.Locale_Display_Names), 185 186 Fields(SectionId.DateTime), 187 Gregorian(SectionId.DateTime), 188 Generic( SectionId.DateTime), 189 Buddhist(SectionId.DateTime), 190 Chinese(SectionId.DateTime), 191 Coptic( SectionId.DateTime), 192 Dangi(SectionId.DateTime), 193 Ethiopic(SectionId.DateTime), 194 Ethiopic_Amete_Alem( SectionId.DateTime, "Ethiopic-Amete-Alem"), 195 Hebrew(SectionId.DateTime), 196 Indian( SectionId.DateTime), 197 Islamic(SectionId.DateTime), 198 Japanese(SectionId.DateTime), 199 Persian( SectionId.DateTime), 200 Minguo(SectionId.DateTime), 201 202 Timezone_Display_Patterns(SectionId.Timezones, "Timezone Display Patterns"), 203 NAmerica(SectionId.Timezones, "North America"), 204 SAmerica( SectionId.Timezones, "South America"), 205 Africa(SectionId.Timezones), 206 Europe( SectionId.Timezones), 207 Russia(SectionId.Timezones), 208 WAsia(SectionId.Timezones, "Western Asia"), 209 CAsia(SectionId.Timezones, "Central Asia"), 210 EAsia( SectionId.Timezones, "Eastern Asia"), 211 SAsia(SectionId.Timezones, "Southern Asia"), 212 SEAsia( SectionId.Timezones, "Southeast Asia"), 213 Australasia(SectionId.Timezones), 214 Antarctica( SectionId.Timezones), 215 Oceania(SectionId.Timezones), 216 UnknownT( SectionId.Timezones, "Unknown Region"), 217 Overrides(SectionId.Timezones), 218 219 Symbols( SectionId.Numbers), 220 Number_Formatting_Patterns( SectionId.Numbers, "Number Formatting Patterns"), 221 Compact_Decimal_Formatting( SectionId.Numbers, "Compact Decimal Formatting"), 222 Compact_Decimal_Formatting_Other( SectionId.Numbers, "Compact Decimal Formatting (Other Numbering Systems)"), 223 224 Measurement_Systems( SectionId.Units, "Measurement Systems"), 225 Duration( SectionId.Units), 226 Graphics( SectionId.Units), 227 Length( SectionId.Units), 228 Area( SectionId.Units), 229 Volume( SectionId.Units), 230 SpeedAcceleration( SectionId.Units, "Speed and Acceleration"), 231 MassWeight( SectionId.Units, "Mass and Weight"), 232 EnergyPower( SectionId.Units, "Energy and Power"), 233 ElectricalFrequency( SectionId.Units, "Electrical and Frequency"), 234 Weather( SectionId.Units), 235 Digital( SectionId.Units), 236 Coordinates( SectionId.Units), 237 OtherUnits( SectionId.Units, "Other Units"), 238 CompoundUnits( SectionId.Units, "Compound Units"), 239 240 241 Displaying_Lists( SectionId.Misc, "Displaying Lists"), 242 MinimalPairs(SectionId.Misc, "Minimal Pairs"), 243 PersonNameFormats(SectionId.Misc, "Person Name Formats"), 244 Transforms( SectionId.Misc), 245 246 Identity( SectionId.Special), 247 Version( SectionId.Special), 248 Suppress( SectionId.Special), 249 Deprecated( SectionId.Special), 250 Unknown( SectionId.Special), 251 252 C_NAmerica( SectionId.Currencies, "North America (C)"), 253 //need to add (C) to differentiate from Timezone territories 254 C_SAmerica(SectionId.Currencies, "South America (C)"), 255 C_NWEurope(SectionId.Currencies, "Northern/Western Europe"), 256 C_SEEurope(SectionId.Currencies, "Southern/Eastern Europe"), 257 C_NAfrica(SectionId.Currencies, "Northern Africa"), 258 C_WAfrica(SectionId.Currencies, "Western Africa"), 259 C_MAfrica( SectionId.Currencies, "Middle Africa"), 260 C_EAfrica(SectionId.Currencies, "Eastern Africa"), 261 C_SAfrica(SectionId.Currencies, "Southern Africa"), 262 C_WAsia(SectionId.Currencies, "Western Asia (C)"), 263 C_CAsia(SectionId.Currencies, "Central Asia (C)"), 264 C_EAsia( SectionId.Currencies, "Eastern Asia (C)"), 265 C_SAsia(SectionId.Currencies, "Southern Asia (C)"), 266 C_SEAsia(SectionId.Currencies, "Southeast Asia (C)"), 267 C_Oceania(SectionId.Currencies, "Oceania (C)"), 268 C_Unknown(SectionId.Currencies, "Unknown Region (C)"), 269 270 // BCP47 271 u_Extension(SectionId.BCP47), 272 t_Extension(SectionId.BCP47), 273 274 // Supplemental 275 Alias(SectionId.Supplemental), 276 IdValidity(SectionId.Supplemental), 277 Locale(SectionId.Supplemental), 278 RegionMapping(SectionId.Supplemental), 279 WZoneMapping( SectionId.Supplemental), 280 Transform(SectionId.Supplemental), 281 Units(SectionId.Supplemental), 282 Likely(SectionId.Supplemental), 283 LanguageMatch( SectionId.Supplemental), 284 TerritoryInfo(SectionId.Supplemental), 285 LanguageInfo(SectionId.Supplemental), 286 LanguageGroup( SectionId.Supplemental), 287 Fallback(SectionId.Supplemental), 288 Gender(SectionId.Supplemental), 289 Grammar(SectionId.Supplemental), 290 Metazone(SectionId.Supplemental), 291 NumberSystem( SectionId.Supplemental), 292 Plural(SectionId.Supplemental), 293 PluralRange(SectionId.Supplemental), 294 Containment( SectionId.Supplemental), 295 Currency(SectionId.Supplemental), 296 Calendar(SectionId.Supplemental), 297 WeekData( SectionId.Supplemental), 298 Measurement(SectionId.Supplemental), 299 Language(SectionId.Supplemental), 300 RBNF( SectionId.Supplemental), 301 Segmentation(SectionId.Supplemental), 302 DayPeriod(SectionId.Supplemental), 303 304 Category(SectionId.Characters), 305 306 // [Smileys, People, Animals & Nature, Food & Drink, Travel & Places, Activities, Objects, Symbols, Flags] 307 Smileys(SectionId.Characters, "Smileys & Emotion"), 308 People(SectionId.Characters, "People & Body"), 309 Animals_Nature(SectionId.Characters, "Animals & Nature"), 310 Food_Drink(SectionId.Characters, "Food & Drink"), 311 Travel_Places(SectionId.Characters, "Travel & Places"), 312 Activities(SectionId.Characters), 313 Objects( SectionId.Characters), 314 Symbols2(SectionId.Characters), 315 Flags(SectionId.Characters), 316 Component(SectionId.Characters), 317 Typography(SectionId.Characters), 318 ; 319 320 private final SectionId sectionId; 321 322 private PageId(SectionId sectionId, String... alternateNames) { 323 this.sectionId = sectionId; 324 SectionIdToPageIds.put(sectionId, this); 325 PageIdNames.add(this, alternateNames); 326 } 327 328 /** 329 * Construct a pageId given a string 330 * 331 * @param name 332 * @return 333 */ 334 public static PageId forString(String name) { 335 try { 336 return PageIdNames.forString(name); 337 } catch (Exception e) { 338 throw new ICUException("No PageId for " + name, e); 339 } 340 } 341 342 /** 343 * Returns the page id 344 * 345 * @return a page ID, such as 'Languages' 346 */ 347 @Override 348 public String toString() { 349 return PageIdNames.toString(this); 350 } 351 352 /** 353 * Get the containing section id, such as 'Code Lists' 354 * 355 * @return the containing section ID 356 */ 357 public SectionId getSectionId() { 358 return sectionId; 359 } 360 } 361 362 private final SectionId sectionId; 363 private final PageId pageId; 364 private final String header; 365 private final String code; 366 private final String originalPath; 367 private final SurveyToolStatus status; 368 369 // Used for ordering 370 private final int headerOrder; 371 private final long codeOrder; 372 private final SubstringOrder codeSuborder; 373 374 static final Pattern SEMI = PatternCache.get("\\s*;\\s*"); 375 static final Matcher ALT_MATCHER = PatternCache.get( 376 "\\[@alt=\"([^\"]*+)\"]") 377 .matcher(""); 378 379 static final Collator alphabetic = CLDRConfig.getInstance().getCollatorRoot(); 380 381 // static final RuleBasedCollator alphabetic = (RuleBasedCollator) Collator 382 // .getInstance(ULocale.ENGLISH); 383 // static { 384 // alphabetic.setNumericCollation(true); 385 // alphabetic.freeze(); 386 // } 387 388 static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance(); 389 static final Map<String, String> metazoneToContinent = supplementalDataInfo 390 .getMetazoneToContinentMap(); 391 static final StandardCodes standardCode = StandardCodes.make(); 392 static final Map<String, String> metazoneToPageTerritory = new HashMap<>(); 393 static { 394 Map<String, Map<String, String>> metazoneToRegionToZone = supplementalDataInfo.getMetazoneToRegionToZone(); 395 for (Entry<String, Map<String, String>> metazoneEntry : metazoneToRegionToZone.entrySet()) { 396 String metazone = metazoneEntry.getKey(); 397 String worldZone = metazoneEntry.getValue().get("001"); 398 String territory = Containment.getRegionFromZone(worldZone); 399 if (territory == null) { 400 territory = "ZZ"; 401 } 402 // Russia, Antarctica => territory 403 // in Australasia, Asia, S. America => subcontinent 404 // in N. America => N. America (grouping of 3 subcontinents) 405 // in everything else => continent 406 if (territory.equals("RU") || territory.equals("AQ")) { 407 metazoneToPageTerritory.put(metazone, territory); 408 } else { 409 String continent = Containment.getContinent(territory); 410 String subcontinent = Containment.getSubcontinent(territory); 411 if (continent.equals("142")) { // Asia 412 metazoneToPageTerritory.put(metazone, subcontinent); 413 } else if (continent.equals("019")) { // Americas 414 metazoneToPageTerritory.put(metazone, subcontinent.equals("005") ? subcontinent : "003"); 415 } else if (subcontinent.equals("053")) { // Australasia 416 metazoneToPageTerritory.put(metazone, subcontinent); 417 } else { 418 metazoneToPageTerritory.put(metazone, continent); 419 } 420 } 421 } 422 } 423 424 /** 425 * @param section 426 * @param sectionOrder 427 * @param page 428 * @param pageOrder 429 * @param header 430 * @param headerOrder 431 * @param code 432 * @param codeOrder 433 * @param suborder 434 * @param status 435 */ 436 private PathHeader(SectionId sectionId, PageId pageId, String header, 437 int headerOrder, String code, long codeOrder, SubstringOrder suborder, SurveyToolStatus status, 438 String originalPath) { 439 this.sectionId = sectionId; 440 this.pageId = pageId; 441 this.header = header; 442 this.headerOrder = headerOrder; 443 this.code = code; 444 this.codeOrder = codeOrder; 445 this.codeSuborder = suborder; 446 this.originalPath = originalPath; 447 this.status = status; 448 } 449 450 /** 451 * Return a factory for use in creating the headers. This is cached after first use. 452 * The calls are thread-safe. Null gets the default (CLDRConfig) english file. 453 * 454 * @param englishFile 455 */ 456 public static Factory getFactory(CLDRFile englishFile) { 457 if (factorySingleton == null) { 458 if (englishFile == null) { 459 englishFile = CLDRConfig.getInstance().getEnglish(); 460 } 461 if (!englishFile.getLocaleID().equals(ULocale.ENGLISH.getBaseName())) { 462 throw new IllegalArgumentException("PathHeader's CLDRFile must be '" + 463 ULocale.ENGLISH.getBaseName() + "', but found '" + englishFile.getLocaleID() + "'"); 464 } 465 factorySingleton = new Factory(englishFile); 466 } 467 return factorySingleton; 468 } 469 470 /** 471 * Convenience method for common case. See {{@link #getFactory(CLDRFile)}} 472 */ 473 public static Factory getFactory() { 474 return getFactory(null); 475 } 476 477 /** 478 * @deprecated 479 */ 480 @Deprecated 481 public String getSection() { 482 return sectionId.toString(); 483 } 484 485 public SectionId getSectionId() { 486 return sectionId; 487 } 488 489 /** 490 * @deprecated 491 */ 492 @Deprecated 493 public String getPage() { 494 return pageId.toString(); 495 } 496 497 public PageId getPageId() { 498 return pageId; 499 } 500 501 public String getHeader() { 502 return header == null ? "" : header; 503 } 504 505 public String getCode() { 506 return code; 507 } 508 509 public String getHeaderCode() { 510 return getHeader() + ": " + getCode(); 511 } 512 513 public String getOriginalPath() { 514 return originalPath; 515 } 516 517 public SurveyToolStatus getSurveyToolStatus() { 518 return status; 519 } 520 521 @Override 522 public String toString() { 523 return sectionId 524 + "\t" + pageId 525 + "\t" + header // + "\t" + headerOrder 526 + "\t" + code // + "\t" + codeOrder 527 ; 528 } 529 530 @Override 531 public int compareTo(PathHeader other) { 532 // Within each section, order alphabetically if the integer orders are 533 // not different. 534 try { 535 int result; 536 if (0 != (result = sectionId.compareTo(other.sectionId))) { 537 return result; 538 } 539 if (0 != (result = pageId.compareTo(other.pageId))) { 540 return result; 541 } 542 if (0 != (result = headerOrder - other.headerOrder)) { 543 return result; 544 } 545 if (0 != (result = alphabeticCompare(header, other.header))) { 546 return result; 547 } 548 long longResult; 549 if (0 != (longResult = codeOrder - other.codeOrder)) { 550 return longResult < 0 ? -1 : longResult > 0 ? 1 : 0; 551 } 552 if (codeSuborder != null) { // do all three cases, for transitivity 553 if (other.codeSuborder != null) { 554 if (0 != (result = codeSuborder.compareTo(other.codeSuborder))) { 555 return result; 556 } 557 } else { 558 return 1; // if codeSuborder != null (and other.codeSuborder 559 // == null), it is greater 560 } 561 } else if (other.codeSuborder != null) { 562 return -1; // if codeSuborder == null (and other.codeSuborder != 563 // null), it is greater 564 } 565 if (0 != (result = alphabeticCompare(code, other.code))) { 566 return result; 567 } 568 if (!SKIP_ORIGINAL_PATH) { 569 if (0 != (result = alphabeticCompare(originalPath, other.originalPath))) { 570 return result; 571 } 572 } 573 return 0; 574 } catch (RuntimeException e) { 575 throw new IllegalArgumentException("Internal problem comparing " + this + " and " + other, e); 576 } 577 } 578 579 public int compareHeader(PathHeader other) { 580 int result; 581 if (0 != (result = headerOrder - other.headerOrder)) { 582 return result; 583 } 584 if (0 != (result = alphabeticCompare(header, other.header))) { 585 return result; 586 } 587 return result; 588 } 589 590 public int compareCode(PathHeader other) { 591 int result; 592 long longResult; 593 if (0 != (longResult = codeOrder - other.codeOrder)) { 594 return longResult < 0 ? -1 : longResult > 0 ? 1 : 0; 595 } 596 if (codeSuborder != null) { // do all three cases, for transitivity 597 if (other.codeSuborder != null) { 598 if (0 != (result = codeSuborder.compareTo(other.codeSuborder))) { 599 return result; 600 } 601 } else { 602 return 1; // if codeSuborder != null (and other.codeSuborder 603 // == null), it is greater 604 } 605 } else if (other.codeSuborder != null) { 606 return -1; // if codeSuborder == null (and other.codeSuborder != 607 // null), it is greater 608 } 609 if (0 != (result = alphabeticCompare(code, other.code))) { 610 return result; 611 } 612 return result; 613 } 614 615 @Override 616 public boolean equals(Object obj) { 617 PathHeader other; 618 try { 619 other = (PathHeader) obj; 620 } catch (Exception e) { 621 return false; 622 } 623 return sectionId == other.sectionId && pageId == other.pageId 624 && header.equals(other.header) && code.equals(other.code); 625 } 626 627 @Override 628 public int hashCode() { 629 return sectionId.hashCode() ^ pageId.hashCode() ^ header.hashCode() ^ code.hashCode(); 630 } 631 632 public static class Factory implements Transform<String, PathHeader> { 633 static final RegexLookup<RawData> lookup = RegexLookup 634 .of(new PathHeaderTransform()) 635 .setPatternTransform( 636 RegexLookup.RegexFinderTransformPath) 637 .loadFromFile( 638 PathHeader.class, 639 "data/PathHeader.txt"); 640 // synchronized with lookup 641 static final Output<String[]> args = new Output<>(); 642 // synchronized with lookup 643 static final Counter<RawData> counter = new Counter<>(); 644 // synchronized with lookup 645 static final Map<RawData, String> samples = new HashMap<>(); 646 // synchronized with lookup 647 static long order; 648 static SubstringOrder suborder; 649 650 static final Map<String, PathHeader> cache = new HashMap<>(); 651 // synchronized with cache 652 static final Map<SectionId, Map<PageId, SectionPage>> sectionToPageToSectionPage = new EnumMap<>( 653 SectionId.class); 654 static final Relation<SectionPage, String> sectionPageToPaths = Relation 655 .of(new TreeMap<SectionPage, Set<String>>(), 656 HashSet.class); 657 private static CLDRFile englishFile; 658 private Set<String> matchersFound = new HashSet<>(); 659 660 /** 661 * Create a factory for creating PathHeaders. 662 * 663 * @param englishFile 664 * - only sets the file (statically!) if not already set. 665 */ 666 private Factory(CLDRFile englishFile) { 667 setEnglishCLDRFileIfNotSet(englishFile); // temporary 668 } 669 670 /** 671 * Returns true if we set it, false if set before. 672 * 673 * @param englishFile2 674 * @return 675 */ 676 private static boolean setEnglishCLDRFileIfNotSet(CLDRFile englishFile2) { 677 synchronized (Factory.class) { 678 if (englishFile != null) { 679 return false; 680 } 681 englishFile = englishFile2; 682 return true; 683 } 684 } 685 686 /** 687 * Use only when trying to find unmatched patterns 688 */ 689 public void clearCache() { 690 synchronized (cache) { 691 cache.clear(); 692 } 693 } 694 695 /** 696 * Return the PathHeader for a given path. Thread-safe. 697 */ 698 public PathHeader fromPath(String path) { 699 return fromPath(path, null); 700 } 701 702 /** 703 * Return the PathHeader for a given path. Thread-safe. 704 */ 705 @Override 706 public PathHeader transform(String path) { 707 return fromPath(path, null); 708 } 709 710 /** 711 * Return the PathHeader for a given path. Thread-safe. 712 * @param failures a list of failures to add to. 713 */ 714 public PathHeader fromPath(final String path, List<String> failures) { 715 if (path == null) { 716 throw new NullPointerException("Path cannot be null"); 717 } 718 synchronized (cache) { 719 PathHeader old = cache.get(path); 720 if (old != null) { 721 return old; 722 } 723 } 724 synchronized (lookup) { 725 String cleanPath = path; 726 // special handling for alt 727 String alt = null; 728 int altPos = cleanPath.indexOf("[@alt="); 729 if (altPos >= 0 && !cleanPath.endsWith("/symbol[@alt=\"narrow\"]")) { 730 if (ALT_MATCHER.reset(cleanPath).find()) { 731 alt = ALT_MATCHER.group(1); 732 cleanPath = cleanPath.substring(0, ALT_MATCHER.start()) 733 + cleanPath.substring(ALT_MATCHER.end()); 734 int pos = alt.indexOf("proposed"); 735 if (pos >= 0 && !path.startsWith("//ldml/collations")) { 736 alt = pos == 0 ? null : alt.substring(0, pos - 1); 737 // drop "proposed", 738 // change "xxx-proposed" to xxx. 739 } 740 } else { 741 throw new IllegalArgumentException(); 742 } 743 } 744 Output<Finder> matcherFound = new Output<>(); 745 RawData data = lookup.get(cleanPath, null, args, matcherFound, failures); 746 if (data == null) { 747 return null; 748 } 749 matchersFound.add(matcherFound.value.toString()); 750 counter.add(data, 1); 751 if (!samples.containsKey(data)) { 752 samples.put(data, cleanPath); 753 } 754 try { 755 PathHeader result = new PathHeader( 756 SectionId.forString(fix(data.section, 0)), 757 PageId.forString(fix(data.page, 0)), 758 fix(data.header, data.headerOrder), 759 (int)order, // only valid after call to fix. TODO, make 760 // this cleaner 761 fix(data.code + (alt == null ? "" : ("-" + alt)), data.codeOrder), 762 order, // only valid after call to fix 763 suborder, 764 data.status, 765 path); 766 synchronized (cache) { 767 PathHeader old = cache.get(path); 768 if (old == null) { 769 cache.put(path, result); 770 } else { 771 result = old; 772 } 773 Map<PageId, SectionPage> pageToPathHeaders = sectionToPageToSectionPage 774 .get(result.sectionId); 775 if (pageToPathHeaders == null) { 776 sectionToPageToSectionPage.put(result.sectionId, pageToPathHeaders = new EnumMap<>(PageId.class)); 777 } 778 SectionPage sectionPage = pageToPathHeaders.get(result.pageId); 779 if (sectionPage == null) { 780 sectionPage = new SectionPage(result.sectionId, result.pageId); 781 pageToPathHeaders.put(result.pageId, sectionPage); 782 } 783 sectionPageToPaths.put(sectionPage, path); 784 } 785 return result; 786 } catch (Exception e) { 787 throw new IllegalArgumentException( 788 "Probably mismatch in Page/Section enum, or too few capturing groups in regex for " + path, 789 e); 790 } 791 } 792 } 793 794 private static class SectionPage implements Comparable<SectionPage> { 795 private final SectionId sectionId; 796 private final PageId pageId; 797 SectionPage(SectionId sectionId, PageId pageId)798 public SectionPage(SectionId sectionId, PageId pageId) { 799 this.sectionId = sectionId; 800 this.pageId = pageId; 801 } 802 803 @Override compareTo(SectionPage other)804 public int compareTo(SectionPage other) { 805 // Within each section, order alphabetically if the integer 806 // orders are 807 // not different. 808 int result; 809 if (0 != (result = sectionId.compareTo(other.sectionId))) { 810 return result; 811 } 812 if (0 != (result = pageId.compareTo(other.pageId))) { 813 return result; 814 } 815 return 0; 816 } 817 818 @Override equals(Object obj)819 public boolean equals(Object obj) { 820 PathHeader other; 821 try { 822 other = (PathHeader) obj; 823 } catch (Exception e) { 824 return false; 825 } 826 return sectionId == other.sectionId && pageId == other.pageId; 827 } 828 829 @Override hashCode()830 public int hashCode() { 831 return sectionId.hashCode() ^ pageId.hashCode(); 832 } 833 @Override toString()834 public String toString() { 835 return sectionId + " > " + pageId; 836 } 837 } 838 839 /** 840 * Returns a set of paths currently associated with the given section 841 * and page. 842 * <p> 843 * <b>Warning:</b> 844 * <ol> 845 * <li>The set may not be complete for a cldrFile unless all of paths in 846 * the file have had fromPath called. And this includes getExtraPaths(). 847 * </li> 848 * <li>The set may include paths that have no value in the current 849 * cldrFile.</li> 850 * <li>The set may be empty, if the section/page aren't valid.</li> 851 * </ol> 852 * Thread-safe. 853 * 854 * @target a collection where the paths are to be returned. 855 */ getCachedPaths(SectionId sectionId, PageId page)856 public static Set<String> getCachedPaths(SectionId sectionId, PageId page) { 857 Set<String> target = new HashSet<>(); 858 synchronized (cache) { 859 Map<PageId, SectionPage> pageToSectionPage = sectionToPageToSectionPage 860 .get(sectionId); 861 if (pageToSectionPage == null) { 862 return target; 863 } 864 SectionPage sectionPage = pageToSectionPage.get(page); 865 if (sectionPage == null) { 866 return target; 867 } 868 Set<String> set = sectionPageToPaths.getAll(sectionPage); 869 target.addAll(set); 870 } 871 return target; 872 } 873 874 /** 875 * Return the Sections and Pages that are in defined, for display in 876 * menus. Both are ordered. 877 */ getSectionIdsToPageIds()878 public static Relation<SectionId, PageId> getSectionIdsToPageIds() { 879 SectionIdToPageIds.freeze(); // just in case 880 return SectionIdToPageIds; 881 } 882 883 /** 884 * Return paths that have the designated section and page. 885 * 886 * @param sectionId 887 * @param pageId 888 * @param file 889 */ filterCldr(SectionId sectionId, PageId pageId, CLDRFile file)890 public Iterable<String> filterCldr(SectionId sectionId, PageId pageId, CLDRFile file) { 891 return new FilteredIterable(sectionId, pageId, file); 892 } 893 894 /** 895 * Return the names for Sections and Pages that are defined, for display 896 * in menus. Both are ordered. 897 * 898 * @deprecated Use getSectionIdsToPageIds 899 */ 900 @Deprecated getSectionsToPages()901 public static LinkedHashMap<String, Set<String>> getSectionsToPages() { 902 LinkedHashMap<String, Set<String>> sectionsToPages = new LinkedHashMap<>(); 903 for (PageId pageId : PageId.values()) { 904 String sectionId2 = pageId.getSectionId().toString(); 905 Set<String> pages = sectionsToPages.get(sectionId2); 906 if (pages == null) { 907 sectionsToPages.put(sectionId2, pages = new LinkedHashSet<>()); 908 } 909 pages.add(pageId.toString()); 910 } 911 return sectionsToPages; 912 } 913 914 /** 915 * @deprecated, use the filterCldr with the section/page ids. 916 */ filterCldr(String section, String page, CLDRFile file)917 public Iterable<String> filterCldr(String section, String page, CLDRFile file) { 918 return new FilteredIterable(section, page, file); 919 } 920 921 private class FilteredIterable implements Iterable<String>, SimpleIterator<String> { 922 private final SectionId sectionId; 923 private final PageId pageId; 924 private final Iterator<String> fileIterator; 925 FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file)926 FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file) { 927 this.sectionId = sectionId; 928 this.pageId = pageId; 929 this.fileIterator = file.fullIterable().iterator(); 930 } 931 FilteredIterable(String section, String page, CLDRFile file)932 public FilteredIterable(String section, String page, CLDRFile file) { 933 this(SectionId.forString(section), PageId.forString(page), file); 934 } 935 936 @Override iterator()937 public Iterator<String> iterator() { 938 return With.toIterator(this); 939 } 940 941 @Override next()942 public String next() { 943 while (fileIterator.hasNext()) { 944 String path = fileIterator.next(); 945 PathHeader pathHeader = fromPath(path); 946 if (sectionId == pathHeader.sectionId && pageId == pathHeader.pageId) { 947 return path; 948 } 949 } 950 return null; 951 } 952 } 953 954 private static class ChronologicalOrder { 955 private Map<String, Integer> map = new HashMap<>(); 956 private String item; 957 private int order; 958 private ChronologicalOrder toClear; 959 ChronologicalOrder(ChronologicalOrder toClear)960 ChronologicalOrder(ChronologicalOrder toClear) { 961 this.toClear = toClear; 962 } 963 getOrder()964 int getOrder() { 965 return order; 966 } 967 set(String itemToOrder)968 public String set(String itemToOrder) { 969 if (itemToOrder.startsWith("*")) { 970 item = itemToOrder.substring(1, itemToOrder.length()); 971 return item; // keep old order 972 } 973 item = itemToOrder; 974 Integer old = map.get(item); 975 if (old != null) { 976 order = old.intValue(); 977 } else { 978 order = map.size(); 979 map.put(item, order); 980 clearLower(); 981 } 982 return item; 983 } 984 clearLower()985 private void clearLower() { 986 if (toClear != null) { 987 toClear.map.clear(); 988 toClear.order = 0; 989 toClear.clearLower(); 990 } 991 } 992 } 993 994 static class RawData { 995 static ChronologicalOrder codeOrdering = new ChronologicalOrder(null); 996 static ChronologicalOrder headerOrdering = new ChronologicalOrder(codeOrdering); 997 RawData(String source)998 public RawData(String source) { 999 String[] split = SEMI.split(source); 1000 section = split[0]; 1001 // HACK 1002 if (section.equals("Timezones") && split[1].equals("Indian")) { 1003 page = "Indian2"; 1004 } else { 1005 page = split[1]; 1006 } 1007 1008 header = headerOrdering.set(split[2]); 1009 headerOrder = headerOrdering.getOrder(); 1010 1011 code = codeOrdering.set(split[3]); 1012 codeOrder = codeOrdering.getOrder(); 1013 1014 status = split.length < 5 ? SurveyToolStatus.READ_WRITE : SurveyToolStatus.valueOf(split[4]); 1015 } 1016 1017 public final String section; 1018 public final String page; 1019 public final String header; 1020 public final int headerOrder; 1021 public final String code; 1022 public final int codeOrder; 1023 public final SurveyToolStatus status; 1024 1025 @Override 1026 public String toString() { 1027 return section + "\t" 1028 + page + "\t" 1029 + header + "\t" + headerOrder + "\t" 1030 + code + "\t" + codeOrder + "\t" 1031 + status; 1032 } 1033 } 1034 1035 static class PathHeaderTransform implements Transform<String, RawData> { 1036 @Override 1037 public RawData transform(String source) { 1038 return new RawData(source); 1039 } 1040 } 1041 1042 /** 1043 * Internal data, for testing and debugging. 1044 * 1045 * @deprecated 1046 */ 1047 @Deprecated 1048 public class CounterData extends Row.R4<String, RawData, String, String> { 1049 public CounterData(String a, RawData b, String c) { 1050 super(a, b, c == null ? "no sample" : c, c == null ? "no sample" : fromPath(c) 1051 .toString()); 1052 } 1053 } 1054 1055 /** 1056 * Get the internal data, for testing and debugging. 1057 * 1058 * @deprecated 1059 */ 1060 @Deprecated 1061 public Counter<CounterData> getInternalCounter() { 1062 synchronized (lookup) { 1063 Counter<CounterData> result = new Counter<>(); 1064 for (Map.Entry<Finder, RawData> foo : lookup) { 1065 Finder finder = foo.getKey(); 1066 RawData data = foo.getValue(); 1067 long count = counter.get(data); 1068 result.add(new CounterData(finder.toString(), data, samples.get(data)), count); 1069 } 1070 return result; 1071 } 1072 } 1073 1074 static Map<String, Transform<String, String>> functionMap = new HashMap<>(); 1075 static String[] months = { "Jan", "Feb", "Mar", 1076 "Apr", "May", "Jun", 1077 "Jul", "Aug", "Sep", 1078 "Oct", "Nov", "Dec", 1079 "Und" }; 1080 static List<String> days = Arrays.asList("sun", "mon", 1081 "tue", "wed", "thu", 1082 "fri", "sat"); 1083 static List<String> unitOrder = DtdData.unitOrder.getOrder(); 1084 static final MapComparator<String> dayPeriods = new MapComparator<String>().add( 1085 "am", "pm", "midnight", "noon", 1086 "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2").freeze(); 1087 // static Map<String, String> likelySubtags = 1088 // supplementalDataInfo.getLikelySubtags(); 1089 static LikelySubtags likelySubtags = new LikelySubtags(); 1090 static HyphenSplitter hyphenSplitter = new HyphenSplitter(); 1091 static Transform<String, String> catFromTerritory; 1092 static Transform<String, String> catFromTimezone; 1093 static { 1094 // Put any new functions used in PathHeader.txt in here. 1095 // To change the order of items within a section or heading, set 1096 // order/suborder to be the relative position of the current item. 1097 functionMap.put("month", new Transform<String, String>() { 1098 @Override 1099 public String transform(String source) { 1100 int m = Integer.parseInt(source); 1101 order = m; 1102 return months[m - 1]; 1103 } 1104 }); 1105 functionMap.put("count", new Transform<String, String>() { 1106 @Override 1107 public String transform(String source) { 1108 suborder = new SubstringOrder(source); 1109 return source; 1110 } 1111 }); 1112 functionMap.put("count2", new Transform<String, String>() { 1113 @Override 1114 public String transform(String source) { 1115 int pos = source.indexOf('-'); 1116 source = pos + source.substring(pos); 1117 suborder = new SubstringOrder(source); // make 10000-... 1118 // into 5- 1119 return source; 1120 } 1121 }); 1122 functionMap.put("currencySymbol", new Transform<String, String>() { 1123 @Override 1124 public String transform(String source) { 1125 order = 901; 1126 if (source.endsWith("narrow")) { 1127 order = 902; 1128 } 1129 if (source.endsWith("variant")) { 1130 order = 903; 1131 } 1132 return source; 1133 } 1134 }); 1135 // &unitCount($1-$3-$4), where $1 is length, $2 is count, $3 is case (optional) 1136 // but also 1137 // &unitCount($1-$3-$5-$4), where $5 is case, $4 is gender — notice order change 1138 functionMap.put("unitCount", new Transform<String, String>() { 1139 @Override 1140 public String transform(String source) { 1141 List<String> parts = HYPHEN_SPLITTER.splitToList(source); 1142 if (parts.size() == 1) { 1143 return source; 1144 } 1145 int lengthNumber = Width.getValue(parts.get(0)).ordinal(); 1146 int type = 0; 1147 int rest = 0; 1148 switch(parts.get(1)) { 1149 case "gender": 1150 type = 0; 1151 break; 1152 case "displayName": 1153 type = 1; 1154 break; 1155 case "per": 1156 type = 2; 1157 break; 1158 default: 1159 type = 3; 1160 int countNumber = (parts.size() > 1 ? Count.valueOf(parts.get(1)) : Count.other).ordinal(); 1161 int caseNumber = (parts.size() > 2 ? GrammarInfo.CaseValues.valueOf(parts.get(2)) : GrammarInfo.CaseValues.nominative).ordinal(); 1162 int genderNumber = GrammarInfo.GenderValues.neuter.ordinal(); 1163 if (parts.size() > 3) { 1164 String genderPart = parts.get(3); 1165 if (!genderPart.equals("dgender")) { 1166 genderNumber = GrammarInfo.GenderValues.valueOf(genderPart).ordinal(); 1167 } 1168 type = 4; 1169 } 1170 rest = (countNumber << 16) | (caseNumber << 8) | genderNumber; 1171 break; 1172 } 1173 order = (type << 28) | (lengthNumber << 24) | rest; 1174 1175 // String[] unitLengths = { "long", "short", "narrow" }; 1176 // int pos = 9; 1177 // for (int i = 0; i < unitLengths.length; i++) { 1178 // if (source.startsWith(unitLengths[i])) { 1179 // pos = i; 1180 // continue; 1181 // } 1182 // } 1183 // order = pos; 1184 // suborder = new SubstringOrder(pos + "-" + source); // 1185 return source; 1186 } 1187 }); 1188 1189 functionMap.put("pluralNumber", new Transform<String, String>() { 1190 @Override 1191 public String transform(String source) { 1192 order = GrammarInfo.PluralValues.valueOf(source).ordinal(); 1193 return source; 1194 } 1195 }); 1196 1197 functionMap.put("caseNumber", new Transform<String, String>() { 1198 @Override 1199 public String transform(String source) { 1200 order = GrammarInfo.CaseValues.valueOf(source).ordinal(); 1201 return source; 1202 } 1203 }); 1204 1205 functionMap.put("genderNumber", new Transform<String, String>() { 1206 @Override 1207 public String transform(String source) { 1208 order = GrammarInfo.GenderValues.valueOf(source).ordinal(); 1209 return source; 1210 } 1211 }); 1212 1213 functionMap.put("day", new Transform<String, String>() { 1214 @Override 1215 public String transform(String source) { 1216 int m = days.indexOf(source); 1217 order = m; 1218 return source; 1219 } 1220 }); 1221 functionMap.put("dayPeriod", new Transform<String, String>() { 1222 @Override 1223 public String transform(String source) { 1224 try { 1225 order = dayPeriods.getNumericOrder(source); 1226 } catch (Exception e) { 1227 // if an old item is tried, like "evening", this will fail. 1228 // so that old data still works, hack this. 1229 order = Math.abs(source.hashCode() << 16); 1230 } 1231 return source; 1232 } 1233 }); 1234 functionMap.put("calendar", new Transform<String, String>() { 1235 Map<String, String> fixNames = Builder.with(new HashMap<String, String>()) 1236 .put("islamicc", "Islamic Civil") 1237 .put("roc", "Minguo") 1238 .put("Ethioaa", "Ethiopic Amete Alem") 1239 .put("Gregory", "Gregorian") 1240 .put("iso8601", "ISO 8601") 1241 .freeze(); 1242 1243 @Override 1244 public String transform(String source) { 1245 String result = fixNames.get(source); 1246 return result != null ? result : UCharacter.toTitleCase(source, null); 1247 } 1248 }); 1249 1250 functionMap.put("calField", new Transform<String, String>() { 1251 @Override 1252 public String transform(String source) { 1253 String[] fields = source.split(":", 3); 1254 order = 0; 1255 final List<String> widthValues = Arrays.asList( 1256 "wide", "abbreviated", "short", "narrow"); 1257 final List<String> calendarFieldValues = Arrays.asList( 1258 "Eras", 1259 "Quarters", 1260 "Months", 1261 "Days", 1262 "DayPeriods", 1263 "Formats"); 1264 final List<String> calendarFormatTypes = Arrays.asList( 1265 "Standard", 1266 "Flexible", 1267 "Intervals"); 1268 final List<String> calendarContextTypes = Arrays.asList( 1269 "none", 1270 "format", 1271 "stand-alone"); 1272 final List<String> calendarFormatSubtypes = Arrays.asList( 1273 "date", 1274 "time", 1275 "time12", 1276 "time24", 1277 "dateTime", 1278 "fallback"); 1279 1280 Map<String, String> fixNames = Builder.with(new HashMap<String, String>()) 1281 .put("DayPeriods", "Day Periods") 1282 .put("format", "Formatting") 1283 .put("stand-alone", "Standalone") 1284 .put("none", "") 1285 .put("date", "Date Formats") 1286 .put("time", "Time Formats") 1287 .put("time12", "12 Hour Time Formats") 1288 .put("time24", "24 Hour Time Formats") 1289 .put("dateTime", "Date & Time Combination Formats") 1290 .freeze(); 1291 1292 if (calendarFieldValues.contains(fields[0])) { 1293 order = calendarFieldValues.indexOf(fields[0]) * 100; 1294 } else { 1295 order = calendarFieldValues.size() * 100; 1296 } 1297 1298 if (fields[0].equals("Formats")) { 1299 if (calendarFormatTypes.contains(fields[1])) { 1300 order += calendarFormatTypes.indexOf(fields[1]) * 10; 1301 } else { 1302 order += calendarFormatTypes.size() * 10; 1303 } 1304 if (calendarFormatSubtypes.contains(fields[2])) { 1305 order += calendarFormatSubtypes.indexOf(fields[2]); 1306 } else { 1307 order += calendarFormatSubtypes.size(); 1308 } 1309 } else { 1310 if (widthValues.contains(fields[1])) { 1311 order += widthValues.indexOf(fields[1]) * 10; 1312 } else { 1313 order += widthValues.size() * 10; 1314 } 1315 if (calendarContextTypes.contains(fields[2])) { 1316 order += calendarContextTypes.indexOf(fields[2]); 1317 } else { 1318 order += calendarContextTypes.size(); 1319 } 1320 } 1321 1322 String[] fixedFields = new String[fields.length]; 1323 for (int i = 0; i < fields.length; i++) { 1324 String s = fixNames.get(fields[i]); 1325 fixedFields[i] = s != null ? s : fields[i]; 1326 } 1327 1328 return fixedFields[0] + 1329 " - " + fixedFields[1] + 1330 (fixedFields[2].length() > 0 ? " - " + fixedFields[2] : ""); 1331 } 1332 }); 1333 1334 functionMap.put("titlecase", new Transform<String, String>() { 1335 @Override 1336 public String transform(String source) { 1337 return UCharacter.toTitleCase(source, null); 1338 } 1339 }); 1340 functionMap.put("categoryFromScript", new Transform<String, String>() { 1341 @Override 1342 public String transform(String source) { 1343 String script = hyphenSplitter.split(source); 1344 Info info = ScriptMetadata.getInfo(script); 1345 if (info == null) { 1346 info = ScriptMetadata.getInfo("Zzzz"); 1347 } 1348 order = 100 - info.idUsage.ordinal(); 1349 return info.idUsage.name; 1350 } 1351 }); 1352 functionMap.put("categoryFromKey", new Transform<String, String>() { 1353 Map<String, String> fixNames = Builder.with(new HashMap<String, String>()) 1354 .put("cf", "Currency Format") 1355 .put("em", "Emoji Presentation") 1356 .put("fw", "First Day of Week") 1357 .put("lb", "Line Break") 1358 .put("hc", "Hour Cycle") 1359 .put("ms", "Measurement System") 1360 .put("ss", "Sentence Break Suppressions") 1361 .freeze(); 1362 1363 @Override 1364 public String transform(String source) { 1365 String fixedName = fixNames.get(source); 1366 return fixedName != null ? fixedName : source; 1367 } 1368 }); 1369 functionMap.put("languageSection", new Transform<String, String>() { 1370 char[] languageRangeStartPoints = { 'A', 'E', 'K', 'O', 'T' }; 1371 char[] languageRangeEndPoints = { 'D', 'J', 'N', 'S', 'Z' }; 1372 1373 @Override 1374 public String transform(String source0) { 1375 char firstLetter = getEnglishFirstLetter(source0).charAt(0); 1376 for (int i = 0; i < languageRangeStartPoints.length; i++) { 1377 if (firstLetter >= languageRangeStartPoints[i] && firstLetter <= languageRangeEndPoints[i]) { 1378 return "Languages (" + Character.toUpperCase(languageRangeStartPoints[i]) + "-" + Character.toUpperCase(languageRangeEndPoints[i]) 1379 + ")"; 1380 } 1381 } 1382 return "Languages"; 1383 } 1384 }); 1385 functionMap.put("firstLetter", new Transform<String, String>() { 1386 @Override 1387 public String transform(String source0) { 1388 return getEnglishFirstLetter(source0); 1389 } 1390 }); 1391 functionMap.put("languageSort", new Transform<String, String>() { 1392 @Override 1393 public String transform(String source0) { 1394 String languageOnlyPart; 1395 int underscorePos = source0.indexOf("_"); 1396 if (underscorePos > 0) { 1397 languageOnlyPart = source0.substring(0, underscorePos); 1398 } else { 1399 languageOnlyPart = source0; 1400 } 1401 1402 return englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart) + " \u25BA " + source0; 1403 } 1404 }); 1405 functionMap.put("scriptFromLanguage", new Transform<String, String>() { 1406 @Override 1407 public String transform(String source0) { 1408 String language = hyphenSplitter.split(source0); 1409 String script = likelySubtags.getLikelyScript(language); 1410 if (script == null) { 1411 script = likelySubtags.getLikelyScript(language); 1412 } 1413 String scriptName = englishFile.getName(CLDRFile.SCRIPT_NAME, script); 1414 return "Languages in " + (script.equals("Hans") || script.equals("Hant") ? "Han Script" 1415 : scriptName.endsWith(" Script") ? scriptName 1416 : scriptName + " Script"); 1417 } 1418 }); 1419 functionMap.put("categoryFromTerritory", 1420 catFromTerritory = new Transform<String, String>() { 1421 @Override 1422 public String transform(String source) { 1423 String territory = getSubdivisionsTerritory(source, null); 1424 String container = Containment.getContainer(territory); 1425 order = Containment.getOrder(territory); 1426 return englishFile.getName(CLDRFile.TERRITORY_NAME, container); 1427 } 1428 }); 1429 functionMap.put("territorySection", new Transform<String, String>() { 1430 final Set<String> specialRegions = new HashSet<>(Arrays.asList("EZ", "EU", "QO", "UN", "ZZ")); 1431 1432 @Override 1433 public String transform(String source0) { 1434 // support subdivisions 1435 String theTerritory = getSubdivisionsTerritory(source0, null); 1436 try { 1437 if (specialRegions.contains(theTerritory) 1438 || theTerritory.charAt(0) < 'A' && Integer.valueOf(theTerritory) > 0) { 1439 return "Geographic Regions"; 1440 } 1441 } catch (NumberFormatException ex) { 1442 } 1443 String theContinent = Containment.getContinent(theTerritory); 1444 String theSubContinent; 1445 switch (theContinent) { // was Integer.valueOf 1446 case "019": // Americas - For the territorySection, we just group North America & South America 1447 final String subcontinent = Containment.getSubcontinent(theTerritory); 1448 theSubContinent = subcontinent.equals("005") ? "005" : "003"; // was Integer.valueOf(subcontinent) == 5 1449 return "Territories (" + englishFile.getName(CLDRFile.TERRITORY_NAME, theSubContinent) + ")"; 1450 case "001": 1451 case "ZZ": 1452 return "Geographic Regions"; // not in containment 1453 default: 1454 return "Territories (" + englishFile.getName(CLDRFile.TERRITORY_NAME, theContinent) + ")"; 1455 } 1456 } 1457 }); 1458 functionMap.put("categoryFromTimezone", 1459 catFromTimezone = new Transform<String, String>() { 1460 @Override 1461 public String transform(String source0) { 1462 String territory = Containment.getRegionFromZone(source0); 1463 if (territory == null) { 1464 territory = "ZZ"; 1465 } 1466 return catFromTerritory.transform(territory); 1467 } 1468 }); 1469 functionMap.put("timeZonePage", new Transform<String, String>() { 1470 Set<String> singlePageTerritories = new HashSet<>(Arrays.asList("AQ", "RU", "ZZ")); 1471 1472 @Override 1473 public String transform(String source0) { 1474 String theTerritory = Containment.getRegionFromZone(source0); 1475 if (theTerritory == null || theTerritory == "001") { 1476 theTerritory = "ZZ"; 1477 } 1478 if (singlePageTerritories.contains(theTerritory)) { 1479 return englishFile.getName(CLDRFile.TERRITORY_NAME, theTerritory); 1480 } 1481 String theContinent = Containment.getContinent(theTerritory); 1482 final String subcontinent = Containment.getSubcontinent(theTerritory); 1483 String theSubContinent; 1484 switch (Integer.valueOf(theContinent)) { 1485 case 9: // Oceania - For the timeZonePage, we group Australasia on one page, and the rest of Oceania on the other. 1486 try { 1487 theSubContinent = subcontinent.equals("053") ? "053" : "009"; // was Integer.valueOf(subcontinent) == 53 1488 } catch (NumberFormatException ex) { 1489 theSubContinent = "009"; 1490 } 1491 return englishFile.getName(CLDRFile.TERRITORY_NAME, theSubContinent); 1492 case 19: // Americas - For the timeZonePage, we just group North America & South America 1493 theSubContinent = Integer.valueOf(subcontinent) == 5 ? "005" : "003"; 1494 return englishFile.getName(CLDRFile.TERRITORY_NAME, theSubContinent); 1495 case 142: // Asia 1496 return englishFile.getName(CLDRFile.TERRITORY_NAME, subcontinent); 1497 default: 1498 return englishFile.getName(CLDRFile.TERRITORY_NAME, theContinent); 1499 } 1500 } 1501 }); 1502 1503 functionMap.put("timezoneSorting", new Transform<String, String>() { 1504 @Override 1505 public String transform(String source) { 1506 final List<String> codeValues = Arrays.asList( 1507 "generic-long", 1508 "generic-short", 1509 "standard-long", 1510 "standard-short", 1511 "daylight-long", 1512 "daylight-short"); 1513 if (codeValues.contains(source)) { 1514 order = codeValues.indexOf(source); 1515 } else { 1516 order = codeValues.size(); 1517 } 1518 return source; 1519 } 1520 }); 1521 1522 functionMap.put("tzdpField", new Transform<String, String>() { 1523 @Override 1524 public String transform(String source) { 1525 Map<String, String> fieldNames = Builder.with(new HashMap<String, String>()) 1526 .put("regionFormat", "Region Format - Generic") 1527 .put("regionFormat-standard", "Region Format - Standard") 1528 .put("regionFormat-daylight", "Region Format - Daylight") 1529 .put("gmtFormat", "GMT Format") 1530 .put("hourFormat", "GMT Hours/Minutes Format") 1531 .put("gmtZeroFormat", "GMT Zero Format") 1532 .put("fallbackFormat", "Location Fallback Format") 1533 .freeze(); 1534 final List<String> fieldOrder = Arrays.asList( 1535 "regionFormat", 1536 "regionFormat-standard", 1537 "regionFormat-daylight", 1538 "gmtFormat", 1539 "hourFormat", 1540 "gmtZeroFormat", 1541 "fallbackFormat"); 1542 1543 if (fieldOrder.contains(source)) { 1544 order = fieldOrder.indexOf(source); 1545 } else { 1546 order = fieldOrder.size(); 1547 } 1548 1549 String result = fieldNames.get(source); 1550 return result == null ? source : result; 1551 } 1552 }); 1553 functionMap.put("unit", new Transform<String, String>() { 1554 @Override 1555 public String transform(String source) { 1556 int m = unitOrder.indexOf(source); 1557 order = m; 1558 return source.substring(source.indexOf('-') + 1); 1559 } 1560 }); 1561 1562 functionMap.put("numericSort", new Transform<String, String>() { 1563 // Probably only works well for small values, like -5 through +4. 1564 @Override 1565 public String transform(String source) { 1566 Integer pos = Integer.valueOf(source) + 5; 1567 suborder = new SubstringOrder(pos.toString()); 1568 return source; 1569 } 1570 }); 1571 1572 functionMap.put("metazone", new Transform<String, String>() { 1573 1574 @Override 1575 public String transform(String source) { 1576 if (PathHeader.UNIFORM_CONTINENTS) { 1577 String container = getMetazonePageTerritory(source); 1578 order = Containment.getOrder(container); 1579 return englishFile.getName(CLDRFile.TERRITORY_NAME, container); 1580 } else { 1581 String continent = metazoneToContinent.get(source); 1582 if (continent == null) { 1583 continent = "UnknownT"; 1584 } 1585 return continent; 1586 } 1587 } 1588 }); 1589 1590 Object[][] ctto = { 1591 { "BUK", "MM" }, 1592 { "CSD", "RS" }, 1593 { "CSK", "CZ" }, 1594 { "DDM", "DE" }, 1595 { "EUR", "ZZ" }, 1596 { "RHD", "ZW" }, 1597 { "SUR", "RU" }, 1598 { "TPE", "TL" }, 1599 { "XAG", "ZZ" }, 1600 { "XAU", "ZZ" }, 1601 { "XBA", "ZZ" }, 1602 { "XBB", "ZZ" }, 1603 { "XBC", "ZZ" }, 1604 { "XBD", "ZZ" }, 1605 { "XDR", "ZZ" }, 1606 { "XEU", "ZZ" }, 1607 { "XFO", "ZZ" }, 1608 { "XFU", "ZZ" }, 1609 { "XPD", "ZZ" }, 1610 { "XPT", "ZZ" }, 1611 { "XRE", "ZZ" }, 1612 { "XSU", "ZZ" }, 1613 { "XTS", "ZZ" }, 1614 { "XUA", "ZZ" }, 1615 { "XXX", "ZZ" }, 1616 { "YDD", "YE" }, 1617 { "YUD", "RS" }, 1618 { "YUM", "RS" }, 1619 { "YUN", "RS" }, 1620 { "YUR", "RS" }, 1621 { "ZRN", "CD" }, 1622 { "ZRZ", "CD" }, 1623 }; 1624 1625 Object[][] sctc = { 1626 { "Northern America", "North America (C)" }, 1627 { "Central America", "North America (C)" }, 1628 { "Caribbean", "North America (C)" }, 1629 { "South America", "South America (C)" }, 1630 { "Northern Africa", "Northern Africa" }, 1631 { "Western Africa", "Western Africa" }, 1632 { "Middle Africa", "Middle Africa" }, 1633 { "Eastern Africa", "Eastern Africa" }, 1634 { "Southern Africa", "Southern Africa" }, 1635 { "Europe", "Northern/Western Europe" }, 1636 { "Northern Europe", "Northern/Western Europe" }, 1637 { "Western Europe", "Northern/Western Europe" }, 1638 { "Eastern Europe", "Southern/Eastern Europe" }, 1639 { "Southern Europe", "Southern/Eastern Europe" }, 1640 { "Western Asia", "Western Asia (C)" }, 1641 { "Central Asia", "Central Asia (C)" }, 1642 { "Eastern Asia", "Eastern Asia (C)" }, 1643 { "Southern Asia", "Southern Asia (C)" }, 1644 { "Southeast Asia", "Southeast Asia (C)" }, 1645 { "Australasia", "Oceania (C)" }, 1646 { "Melanesia", "Oceania (C)" }, 1647 { "Micronesian Region", "Oceania (C)" }, // HACK 1648 { "Polynesia", "Oceania (C)" }, 1649 { "Unknown Region", "Unknown Region (C)" }, 1650 }; 1651 1652 final Map<String, String> currencyToTerritoryOverrides = CldrUtility.asMap(ctto); 1653 final Map<String, String> subContinentToContinent = CldrUtility.asMap(sctc); 1654 final Set<String> fundCurrencies = new HashSet<>(Arrays.asList("CHE", "CHW", "CLF", "COU", "ECV", "MXV", "USN", "USS", "UYI", "XEU", "ZAL")); 1655 final Set<String> offshoreCurrencies = new HashSet<>(Arrays.asList("CNH")); 1656 // TODO: Put this into supplementalDataInfo ? 1657 1658 functionMap.put("categoryFromCurrency", new Transform<String, String>() { 1659 @Override 1660 public String transform(String source0) { 1661 String tenderOrNot = ""; 1662 String territory = likelySubtags.getLikelyTerritoryFromCurrency(source0); 1663 if (territory == null) { 1664 String tag; 1665 if (fundCurrencies.contains(source0)) { 1666 tag = " (fund)"; 1667 } else if (offshoreCurrencies.contains(source0)) { 1668 tag = " (offshore)"; 1669 } else { 1670 tag = " (old)"; 1671 } 1672 tenderOrNot = ": " + source0 + tag; 1673 } 1674 if (currencyToTerritoryOverrides.keySet().contains(source0)) { 1675 territory = currencyToTerritoryOverrides.get(source0); 1676 } else if (territory == null) { 1677 territory = source0.substring(0, 2); 1678 } 1679 1680 if (territory.equals("ZZ")) { 1681 order = 999; 1682 return englishFile.getName(CLDRFile.TERRITORY_NAME, territory) + ": " + source0; 1683 } else { 1684 return catFromTerritory.transform(territory) + ": " 1685 + englishFile.getName(CLDRFile.TERRITORY_NAME, territory) 1686 + tenderOrNot; 1687 } 1688 } 1689 }); 1690 functionMap.put("continentFromCurrency", new Transform<String, String>() { 1691 @Override 1692 public String transform(String source0) { 1693 String subContinent; 1694 String territory = likelySubtags.getLikelyTerritoryFromCurrency(source0); 1695 if (currencyToTerritoryOverrides.keySet().contains(source0)) { 1696 territory = currencyToTerritoryOverrides.get(source0); 1697 } else if (territory == null) { 1698 territory = source0.substring(0, 2); 1699 } 1700 1701 if (territory.equals("ZZ")) { 1702 order = 999; 1703 subContinent = englishFile.getName(CLDRFile.TERRITORY_NAME, territory); 1704 } else { 1705 subContinent = catFromTerritory.transform(territory); 1706 } 1707 1708 String result = subContinentToContinent.get(subContinent); //the continent is the last word in the territory representation 1709 return result; 1710 } 1711 }); 1712 functionMap.put("numberingSystem", new Transform<String, String>() { 1713 @Override 1714 public String transform(String source0) { 1715 if ("latn".equals(source0)) { 1716 return ""; 1717 } 1718 String displayName = englishFile.getStringValue("//ldml/localeDisplayNames/types/type[@key=\"numbers\"][@type=\"" 1719 + source0 + "\"]"); 1720 return "using " + (displayName == null ? source0 : displayName + " (" + source0 + ")"); 1721 } 1722 }); 1723 1724 functionMap.put("datefield", new Transform<String, String>() { 1725 private final String[] datefield = { 1726 "era", "era-short", "era-narrow", 1727 "century", "century-short", "century-narrow", 1728 "year", "year-short", "year-narrow", 1729 "quarter", "quarter-short", "quarter-narrow", 1730 "month", "month-short", "month-narrow", 1731 "week", "week-short", "week-narrow", 1732 "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow", 1733 "day", "day-short", "day-narrow", 1734 "dayOfYear", "dayOfYear-short", "dayOfYear-narrow", 1735 "weekday", "weekday-short", "weekday-narrow", 1736 "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow", 1737 "dayperiod", "dayperiod-short", "dayperiod-narrow", 1738 "zone", "zone-short", "zone-narrow", 1739 "hour", "hour-short", "hour-narrow", 1740 "minute", "minute-short", "minute-narrow", 1741 "second", "second-short", "second-narrow", 1742 "millisecond", "millisecond-short", "millisecond-narrow", 1743 "microsecond", "microsecond-short", "microsecond-narrow", 1744 "nanosecond", "nanosecond-short", "nanosecond-narrow", 1745 1746 }; 1747 1748 @Override 1749 public String transform(String source) { 1750 order = getIndex(source, datefield); 1751 return source; 1752 } 1753 }); 1754 // //ldml/dates/fields/field[@type="%A"]/relative[@type="%A"] 1755 functionMap.put("relativeDate", new Transform<String, String>() { 1756 private final String[] relativeDateField = { 1757 "year", "year-short", "year-narrow", 1758 "quarter", "quarter-short", "quarter-narrow", 1759 "month", "month-short", "month-narrow", 1760 "week", "week-short", "week-narrow", 1761 "day", "day-short", "day-narrow", 1762 "hour", "hour-short", "hour-narrow", 1763 "minute", "minute-short", "minute-narrow", 1764 "second", "second-short", "second-narrow", 1765 "sun", "sun-short", "sun-narrow", 1766 "mon", "mon-short", "mon-narrow", 1767 "tue", "tue-short", "tue-narrow", 1768 "wed", "wed-short", "wed-narrow", 1769 "thu", "thu-short", "thu-narrow", 1770 "fri", "fri-short", "fri-narrow", 1771 "sat", "sat-short", "sat-narrow", 1772 }; 1773 private final String[] longNames = { 1774 "Year", "Year Short", "Year Narrow", 1775 "Quarter", "Quarter Short", "Quarter Narrow", 1776 "Month", "Month Short", "Month Narrow", 1777 "Week", "Week Short", "Week Narrow", 1778 "Day", "Day Short", "Day Narrow", 1779 "Hour", "Hour Short", "Hour Narrow", 1780 "Minute", "Minute Short", "Minute Narrow", 1781 "Second", "Second Short", "Second Narrow", 1782 "Sunday", "Sunday Short", "Sunday Narrow", 1783 "Monday", "Monday Short", "Monday Narrow", 1784 "Tuesday", "Tuesday Short", "Tuesday Narrow", 1785 "Wednesday", "Wednesday Short", "Wednesday Narrow", 1786 "Thursday", "Thursday Short", "Thursday Narrow", 1787 "Friday", "Friday Short", "Friday Narrow", 1788 "Saturday", "Saturday Short", "Saturday Narrow", 1789 }; 1790 1791 @Override 1792 public String transform(String source) { 1793 order = getIndex(source, relativeDateField) + 100; 1794 return "Relative " + longNames[getIndex(source, relativeDateField)]; 1795 } 1796 }); 1797 // Sorts numberSystem items (except for decimal formats). 1798 functionMap.put("number", new Transform<String, String>() { 1799 private final String[] symbols = { "decimal", "group", 1800 "plusSign", "minusSign", "approximatelySign", 1801 "percentSign", "perMille", 1802 "exponential", "superscriptingExponent", 1803 "infinity", "nan", "list", "currencies" 1804 }; 1805 1806 @Override 1807 public String transform(String source) { 1808 String[] parts = source.split("-"); 1809 order = getIndex(parts[0], symbols); 1810 // e.g. "currencies-one" 1811 if (parts.length > 1) { 1812 suborder = new SubstringOrder(parts[1]); 1813 } 1814 return source; 1815 } 1816 }); 1817 functionMap.put("numberFormat", new Transform<String, String>() { 1818 @Override 1819 public String transform(String source) { 1820 final List<String> fieldOrder = Arrays.asList( 1821 "standard-decimal", 1822 "standard-currency", 1823 "standard-currency-accounting", 1824 "standard-percent", 1825 "standard-scientific"); 1826 1827 if (fieldOrder.contains(source)) { 1828 order = fieldOrder.indexOf(source); 1829 } else { 1830 order = fieldOrder.size(); 1831 } 1832 1833 return source; 1834 } 1835 }); 1836 1837 functionMap.put("localePattern", new Transform<String, String>() { 1838 @Override 1839 public String transform(String source) { 1840 // Put localeKeyTypePattern behind localePattern and 1841 // localeSeparator. 1842 if (source.equals("localeKeyTypePattern")) { 1843 order = 10; 1844 } 1845 return source; 1846 } 1847 }); 1848 functionMap.put("listOrder", new Transform<String, String>() { 1849 private String[] listParts = { "2", "start", "middle", "end" }; 1850 1851 @Override 1852 public String transform(String source) { 1853 order = getIndex(source, listParts); 1854 return source; 1855 } 1856 }); 1857 1858 functionMap.put("personNameSection", new Transform<String, String>() { 1859 @Override 1860 public String transform(String source) { 1861 // sampleName item values in desired sort order 1862 final List<String> itemValues = Arrays.asList("givenOnly", "givenSurnameOnly", "given12Surname", "full"); 1863 // personName attribute values: each group in desired 1864 // sort order, but groups from least important to most 1865 final List<String> pnAttrValues = Arrays.asList( 1866 "long", "medium", "short", // length values 1867 "givenFirst", "surnameFirst", "sorting"); // order values 1868 1869 if (source.equals("NameOrder")) { 1870 order = 0; 1871 return "NameOrder for Locales"; 1872 } 1873 if (source.equals("AuxiliaryItems")) { 1874 order = 10; 1875 return source; 1876 } 1877 String itemPrefix = "SampleName:"; 1878 if (source.startsWith(itemPrefix)) { 1879 String itemValue = source.substring(itemPrefix.length()); 1880 order = 20 + itemValues.indexOf(itemValue); 1881 return "SampleName Fields for Item: " + itemValue; 1882 } 1883 String pnPrefix = "PersonName:"; 1884 if (source.startsWith(pnPrefix)) { 1885 String attrValues = source.substring(pnPrefix.length()); 1886 List<String> parts = HYPHEN_SPLITTER.splitToList(attrValues); 1887 order = 30; 1888 for (String part: parts) { 1889 if (pnAttrValues.contains(part)) { 1890 order += (1 << pnAttrValues.indexOf(part)); 1891 } 1892 } 1893 attrValues = attrValues.replace("sorting-", "sorting/index-"); 1894 return "PersonName Patterns for Order-Length: " + attrValues; 1895 } 1896 order = 40; 1897 return source; 1898 } 1899 }); 1900 1901 functionMap.put("personNameOrder", new Transform<String, String>() { 1902 @Override 1903 public String transform(String source) { 1904 // personName attribute values: each group in desired 1905 // sort order, but groups from least important to most 1906 final List<String> attrValues = Arrays.asList( 1907 "formal", "informal", //formality values 1908 "referring", "addressing", "monogram"); // usage values 1909 // order & length values handled in &personNameSection 1910 1911 List<String> parts = HYPHEN_SPLITTER.splitToList(source); 1912 order = 0; 1913 for (String part: parts) { 1914 if (attrValues.contains(part)) { 1915 order += (1 << attrValues.indexOf(part)); 1916 } // anything else like alt="variant" is at order 0 1917 } 1918 return source; 1919 } 1920 }); 1921 1922 functionMap.put("sampleNameOrder", new Transform<String, String>() { 1923 @Override 1924 public String transform(String source) { 1925 // The various nameField attribute values: each group in desired 1926 // sort order, but groups from least important to most 1927 final List<String> attrValues = Arrays.asList( 1928 "informal", "prefix", "core", // modifiers for nameField type 1929 "prefix", "given", "given2", "surname", "surname2", "suffix"); // values for nameField type 1930 1931 List<String> parts = HYPHEN_SPLITTER.splitToList(source); 1932 order = 0; 1933 for (String part: parts) { 1934 if (attrValues.contains(part)) { 1935 order += (1 << attrValues.indexOf(part)); 1936 } // anything else like alt="variant" is at order 0 1937 } 1938 return source; 1939 } 1940 }); 1941 1942 functionMap.put("alphaOrder", new Transform<String, String>() { 1943 @Override 1944 public String transform(String source) { 1945 order = 0; 1946 return source; 1947 } 1948 }); 1949 functionMap.put("transform", new Transform<String, String>() { 1950 Splitter commas = Splitter.on(',').trimResults(); 1951 1952 @Override 1953 public String transform(String source) { 1954 List<String> parts = commas.splitToList(source); 1955 return parts.get(1) 1956 + (parts.get(0).equals("both") ? "↔︎" : "→") 1957 + parts.get(2) 1958 + (parts.size() > 3 ? "/" + parts.get(3) : ""); 1959 } 1960 }); 1961 functionMap.put("major", new Transform<String, String>() { 1962 @Override 1963 public String transform(String source) { 1964 String major = Emoji.getMajorCategory(source); 1965 // check that result is reasonable by running through PageId. 1966 switch(major) { 1967 default: 1968 PageId pageId2 = PageId.forString(major); 1969 if (pageId2.getSectionId() != SectionId.Characters) { 1970 if (pageId2 == PageId.Symbols) { 1971 pageId2 = PageId.Symbols2; 1972 } 1973 } 1974 return pageId2.toString(); 1975 case "Smileys & People": 1976 String minorCat = Emoji.getMinorCategory(source); 1977 if (minorCat.equals("skin-tone") || minorCat.equals("hair-style")) { 1978 return PageId.Component.toString(); 1979 } else if (!minorCat.contains("face")) { 1980 return PageId.People.toString(); 1981 } else { 1982 return PageId.Smileys.toString(); 1983 } 1984 } 1985 } 1986 }); 1987 functionMap.put("minor", new Transform<String, String>() { 1988 @Override 1989 public String transform(String source) { 1990 String minorCat = Emoji.getMinorCategory(source); 1991 order = Emoji.getEmojiMinorOrder(minorCat); 1992 return minorCat; 1993 } 1994 }); 1995 /** 1996 * Use the ordering of the emoji in getEmojiToOrder rather than alphabetic, 1997 * since the collator data won't be ready until the candidates are final. 1998 */ 1999 functionMap.put("emoji", new Transform<String, String>() { 2000 @Override 2001 public String transform(String source) { 2002 int dashPos = source.indexOf(' '); 2003 String emoji = source.substring(0, dashPos); 2004 order = (Emoji.getEmojiToOrder(emoji) << 1) + (source.endsWith("name") ? 0 : 1); 2005 return source; 2006 } 2007 }); 2008 2009 } 2010 2011 private static int getIndex(String item, String[] array) { 2012 for (int i = 0; i < array.length; i++) { 2013 if (item.equals(array[i])) { 2014 return i; 2015 } 2016 } 2017 return -1; 2018 } 2019 2020 private static String getEnglishFirstLetter(String s) { 2021 String languageOnlyPart; 2022 int underscorePos = s.indexOf("_"); 2023 if (underscorePos > 0) { 2024 languageOnlyPart = s.substring(0, underscorePos); 2025 } else { 2026 languageOnlyPart = s; 2027 } 2028 final String name = englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart); 2029 return name == null ? "?" : name.substring(0, 1).toUpperCase(); 2030 } 2031 2032 static class HyphenSplitter { 2033 String main; 2034 String extras; 2035 2036 String split(String source) { 2037 int hyphenPos = source.indexOf('-'); 2038 if (hyphenPos < 0) { 2039 main = source; 2040 extras = ""; 2041 } else { 2042 main = source.substring(0, hyphenPos); 2043 extras = source.substring(hyphenPos); 2044 } 2045 return main; 2046 } 2047 } 2048 2049 /** 2050 * This converts "functions", like &month, and sets the order. 2051 * 2052 * @param input 2053 * @param order 2054 * @return 2055 */ 2056 private static String fix(String input, int orderIn) { 2057 input = RegexLookup.replace(input, args.value); 2058 order = orderIn; 2059 suborder = null; 2060 int pos = 0; 2061 while (true) { 2062 int functionStart = input.indexOf('&', pos); 2063 if (functionStart < 0) { 2064 return input; 2065 } 2066 int functionEnd = input.indexOf('(', functionStart); 2067 int argEnd = input.indexOf(')', functionEnd+2); // we must insert at least one character 2068 Transform<String, String> func = functionMap.get(input.substring(functionStart + 1, 2069 functionEnd)); 2070 final String arg = input.substring(functionEnd + 1, argEnd); 2071 String temp = func.transform(arg); 2072 if (temp == null) { 2073 func.transform(arg); 2074 throw new IllegalArgumentException("Function returns invalid results for «" + arg + "»."); 2075 } 2076 input = input.substring(0, functionStart) + temp + input.substring(argEnd + 1); 2077 pos = functionStart + temp.length(); 2078 } 2079 } 2080 2081 /** 2082 * Collect all the paths for a CLDRFile, and make sure that they have 2083 * cached PathHeaders 2084 * 2085 * @param file 2086 * @return immutable set of paths in the file 2087 */ 2088 public Set<String> pathsForFile(CLDRFile file) { 2089 // make sure we cache all the path headers 2090 HashSet<String> filePaths = new HashSet<>(); 2091 file.fullIterable().forEach(filePaths::add); 2092 for (String path : filePaths) { 2093 try { 2094 fromPath(path); // call to make sure cached 2095 } catch (Throwable t) { 2096 // ... some other exception 2097 } 2098 } 2099 return Collections.unmodifiableSet(filePaths); 2100 } 2101 2102 /** 2103 * Returns those regexes that were never matched. 2104 * @return 2105 */ 2106 public Set<String> getUnmatchedRegexes() { 2107 Map<String, RawData> outputUnmatched = new LinkedHashMap<>(); 2108 lookup.getUnmatchedPatterns(matchersFound, outputUnmatched); 2109 return outputUnmatched.keySet(); 2110 } 2111 2112 public String getRegexInfo() { 2113 return lookup.toString(); 2114 } 2115 } 2116 2117 /** 2118 * Return the territory used for the title of the Metazone page in the 2119 * Survey Tool. 2120 * 2121 * @param source 2122 * @return 2123 */ 2124 public static String getMetazonePageTerritory(String source) { 2125 String result = metazoneToPageTerritory.get(source); 2126 return result == null ? "ZZ" : result; 2127 } 2128 2129 private static final List<String> COUNTS = Arrays.asList("displayName", "zero", "one", "two", "few", "many", "other", "per"); 2130 2131 private static int alphabeticCompare(String aa, String bb) { 2132 // A frozen Collator is thread-safe. 2133 return alphabetic.compare(aa, bb); 2134 } 2135 2136 public enum BaseUrl { 2137 //http://st.unicode.org/smoketest/survey?_=af&strid=55053dffac611328 2138 //http://st.unicode.org/cldr-apps/survey?_=en&strid=3cd31261bf6738e1 2139 SMOKE("http://st.unicode.org/smoketest/survey"), PRODUCTION("http://st.unicode.org/cldr-apps/survey"); 2140 final String base; 2141 2142 private BaseUrl(String url) { 2143 base = url; 2144 } 2145 } 2146 2147 /** 2148 * @deprecated, use CLDRConfig.urls().forPathHeader() instead. 2149 * @param baseUrl 2150 * @param locale 2151 * @return 2152 */ 2153 public String getUrl(BaseUrl baseUrl, String locale) { 2154 return getUrl(baseUrl.base, locale); 2155 } 2156 2157 /** 2158 * @deprecated, use CLDRConfig.urls().forPathHeader() instead. 2159 * @param baseUrl 2160 * @param locale 2161 * @return 2162 */ 2163 public String getUrl(String baseUrl, String locale) { 2164 return getUrl(baseUrl, locale, getOriginalPath()); 2165 } 2166 2167 /** 2168 * Map http://st.unicode.org/smoketest/survey to http://st.unicode.org/smoketest etc 2169 * @param str 2170 * @return 2171 */ 2172 public static String trimLast(String str) { 2173 int n = str.lastIndexOf('/'); 2174 if (n == -1) return ""; 2175 return str.substring(0, n + 1); 2176 } 2177 2178 public static String getUrlForLocalePath(String locale, String path) { 2179 return getUrl(SURVEY_URL, locale, path); 2180 } 2181 2182 public String getUrlForLocalePath(String locale) { 2183 return getUrl(SURVEY_URL, locale, originalPath); 2184 } 2185 2186 public static String getUrl(String baseUrl, String locale, String path) { 2187 return trimLast(baseUrl) + "v#/" + locale + "//" + StringId.getHexId(path); 2188 } 2189 2190 private static String SURVEY_URL = CLDRConfig.getInstance().getProperty("CLDR_SURVEY_URL", "http://st.unicode.org/cldr-apps/survey"); 2191 2192 public static String getLinkedView(String baseUrl, CLDRFile file, String path) { 2193 return SECTION_LINK + PathHeader.getUrl(baseUrl, file.getLocaleID(), path) + "'><em>view</em></a>"; 2194 } 2195 2196 /** 2197 * If a subdivision, return the (uppercased) territory and if suffix != null, the suffix. Otherwise return the input as is. 2198 * @param input 2199 * @param suffix 2200 * @return 2201 */ 2202 private static String getSubdivisionsTerritory(String input, Output<String> suffix) { 2203 String theTerritory; 2204 if (StandardCodes.LstrType.subdivision.isWellFormed(input)) { 2205 int territoryEnd = input.charAt(0) < 'A' ? 3 : 2; 2206 theTerritory = input.substring(0, territoryEnd).toUpperCase(Locale.ROOT); 2207 if (suffix != null) { 2208 suffix.value = input.substring(territoryEnd); 2209 } 2210 } else { 2211 theTerritory = input; 2212 if (suffix != null) { 2213 suffix.value = ""; 2214 } 2215 } 2216 return theTerritory; 2217 } 2218 2219 /** 2220 * Should this path header be hidden? 2221 * 2222 * @return true to hide, else false 2223 */ 2224 public boolean shouldHide() { 2225 switch (status) { 2226 case HIDE: 2227 case DEPRECATED: 2228 return true; 2229 case READ_ONLY: 2230 case READ_WRITE: 2231 case LTR_ALWAYS: 2232 return false; 2233 default: 2234 logger.log(java.util.logging.Level.SEVERE, "Missing case for " + status); 2235 return false; 2236 } 2237 } 2238 2239 /** 2240 * Are reading and writing allowed for this path header? 2241 * 2242 * @return true if reading and writing are allowed, else false 2243 */ 2244 public boolean canReadAndWrite() { 2245 switch (status) { 2246 case READ_WRITE: 2247 case LTR_ALWAYS: 2248 return true; 2249 case HIDE: 2250 case DEPRECATED: 2251 case READ_ONLY: 2252 return false; 2253 default: 2254 logger.log(java.util.logging.Level.SEVERE, "Missing case for " + status); 2255 return false; 2256 } 2257 } 2258 } 2259