1 package org.unicode.cldr.util; 2 3 import java.util.Arrays; 4 import java.util.Collections; 5 import java.util.EnumMap; 6 import java.util.HashMap; 7 import java.util.HashSet; 8 import java.util.Iterator; 9 import java.util.LinkedHashMap; 10 import java.util.LinkedHashSet; 11 import java.util.List; 12 import java.util.Locale; 13 import java.util.Map; 14 import java.util.Map.Entry; 15 import java.util.Set; 16 import java.util.TreeMap; 17 import java.util.TreeSet; 18 import java.util.regex.Matcher; 19 import java.util.regex.Pattern; 20 21 import org.unicode.cldr.draft.ScriptMetadata; 22 import org.unicode.cldr.draft.ScriptMetadata.Info; 23 import org.unicode.cldr.tool.LikelySubtags; 24 import org.unicode.cldr.util.RegexLookup.Finder; 25 import org.unicode.cldr.util.With.SimpleIterator; 26 27 import com.google.common.base.Splitter; 28 import com.ibm.icu.dev.util.CollectionUtilities; 29 import com.ibm.icu.impl.Relation; 30 import com.ibm.icu.impl.Row; 31 import com.ibm.icu.lang.UCharacter; 32 import com.ibm.icu.text.Collator; 33 import com.ibm.icu.text.Transform; 34 import com.ibm.icu.util.ICUException; 35 import com.ibm.icu.util.Output; 36 import com.ibm.icu.util.ULocale; 37 38 /** 39 * Provides a mechanism for dividing up LDML paths into understandable 40 * categories, eg for the Survey tool. 41 */ 42 public class PathHeader implements Comparable<PathHeader> { 43 /** 44 * Link to a section. Commenting out the page switch for now. 45 */ 46 public static final String SECTION_LINK = "<a " + /* "target='CLDR_ST-SECTION' "+*/"href='"; 47 static boolean UNIFORM_CONTINENTS = true; 48 static Factory factorySingleton = null; 49 50 static final boolean SKIP_ORIGINAL_PATH = true; 51 52 /** 53 * What status the survey tool should use. Can be overridden in 54 * Phase.getAction() 55 */ 56 public enum SurveyToolStatus { 57 /** 58 * Never show. 59 */ 60 DEPRECATED, 61 /** 62 * Hide. Can be overridden in Phase.getAction() 63 */ 64 HIDE, 65 /** 66 * Don't allow Change box (except TC), instead show ticket. But allow 67 * votes. Can be overridden in Phase.getAction() 68 */ 69 READ_ONLY, 70 /** 71 * Allow change box and votes. Can be overridden in Phase.getAction() 72 */ 73 READ_WRITE, 74 /** 75 * Changes are allowed as READ_WRITE, but field is always displayed as 76 * LTR, even in RTL locales (used for patterns). 77 */ 78 LTR_ALWAYS 79 } 80 81 private static EnumNames<SectionId> SectionIdNames = new EnumNames<SectionId>(); 82 83 /** 84 * The Section for a path. Don't change these without committee buy-in. The 85 * 'name' may be 'Core_Data' and the toString is 'Core Data' toString gives 86 * the human name 87 */ 88 public enum SectionId { 89 Core_Data("Core Data"), Locale_Display_Names("Locale Display Names"), DateTime("Date & Time"), Timezones, Numbers, Currencies, Units, Characters, Misc( 90 "Miscellaneous"), BCP47, Supplemental, Special; 91 SectionId(String... alternateNames)92 private SectionId(String... alternateNames) { 93 SectionIdNames.add(this, alternateNames); 94 } 95 forString(String name)96 public static SectionId forString(String name) { 97 return SectionIdNames.forString(name); 98 } 99 toString()100 public String toString() { 101 return SectionIdNames.toString(this); 102 } 103 } 104 105 private static EnumNames<PageId> PageIdNames = new EnumNames<PageId>(); 106 private static Relation<SectionId, PageId> SectionIdToPageIds = Relation.of(new TreeMap<SectionId, Set<PageId>>(), 107 TreeSet.class); 108 109 private static class SubstringOrder implements Comparable<SubstringOrder> { 110 final String mainOrder; 111 final int order; 112 SubstringOrder(String source)113 public SubstringOrder(String source) { 114 int pos = source.lastIndexOf('-') + 1; 115 int ordering = COUNTS.indexOf(source.substring(pos)); 116 // account for digits, and "some" future proofing. 117 order = ordering < 0 118 ? source.charAt(pos) 119 : 0x10000 + ordering; 120 mainOrder = source.substring(0, pos); 121 } 122 123 @Override 124 public String toString() { 125 return "{" + mainOrder + ", " + order + "}"; 126 } 127 128 @Override 129 public int compareTo(SubstringOrder other) { 130 int diff = alphabeticCompare(mainOrder, other.mainOrder); 131 if (diff != 0) { 132 return diff; 133 } 134 return order - other.order; 135 } 136 } 137 138 /** 139 * The Page for a path (within a Section). Don't change these without 140 * committee buy-in. the name is for example WAsia where toString gives 141 * Western Asia 142 */ 143 public enum PageId { 144 Alphabetic_Information(SectionId.Core_Data, "Alphabetic Information"), Numbering_Systems(SectionId.Core_Data, 145 "Numbering Systems"), Locale_Name_Patterns(SectionId.Locale_Display_Names, "Locale Name Patterns"), Languages_A_D(SectionId.Locale_Display_Names, 146 "Languages (A-D)"), Languages_E_J(SectionId.Locale_Display_Names, "Languages (E-J)"), Languages_K_N(SectionId.Locale_Display_Names, 147 "Languages (K-N)"), Languages_O_S(SectionId.Locale_Display_Names, "Languages (O-S)"), Languages_T_Z(SectionId.Locale_Display_Names, 148 "Languages (T-Z)"), Scripts(SectionId.Locale_Display_Names), Territories(SectionId.Locale_Display_Names, 149 "Geographic Regions"), T_NAmerica(SectionId.Locale_Display_Names, "Territories (North America)"), T_SAmerica( 150 SectionId.Locale_Display_Names, 151 "Territories (South America)"), T_Africa(SectionId.Locale_Display_Names, "Territories (Africa)"), T_Europe( 152 SectionId.Locale_Display_Names, 153 "Territories (Europe)"), T_Asia(SectionId.Locale_Display_Names, "Territories (Asia)"), T_Oceania( 154 SectionId.Locale_Display_Names, 155 "Territories (Oceania)"), Locale_Variants(SectionId.Locale_Display_Names, "Locale Variants"), Keys( 156 SectionId.Locale_Display_Names), Fields(SectionId.DateTime), Gregorian(SectionId.DateTime), Generic( 157 SectionId.DateTime), Buddhist(SectionId.DateTime), Chinese(SectionId.DateTime), Coptic( 158 SectionId.DateTime), Dangi(SectionId.DateTime), Ethiopic(SectionId.DateTime), Ethiopic_Amete_Alem( 159 SectionId.DateTime, "Ethiopic-Amete-Alem"), Hebrew(SectionId.DateTime), Indian( 160 SectionId.DateTime), Islamic(SectionId.DateTime), Japanese(SectionId.DateTime), Persian( 161 SectionId.DateTime), Minguo(SectionId.DateTime), Timezone_Display_Patterns(SectionId.Timezones, 162 "Timezone Display Patterns"), NAmerica(SectionId.Timezones, "North America"), SAmerica( 163 SectionId.Timezones, "South America"), Africa(SectionId.Timezones), Europe( 164 SectionId.Timezones), Russia(SectionId.Timezones), WAsia(SectionId.Timezones, 165 "Western Asia"), CAsia(SectionId.Timezones, "Central Asia"), EAsia( 166 SectionId.Timezones, 167 "Eastern Asia"), SAsia(SectionId.Timezones, "Southern Asia"), SEAsia( 168 SectionId.Timezones, 169 "Southeast Asia"), Australasia(SectionId.Timezones), Antarctica( 170 SectionId.Timezones), Oceania(SectionId.Timezones), UnknownT( 171 SectionId.Timezones, 172 "Unknown Region"), Overrides(SectionId.Timezones), Symbols( 173 SectionId.Numbers), MinimalPairs(SectionId.Numbers, 174 "Minimal Pairs"), Number_Formatting_Patterns( 175 SectionId.Numbers, 176 "Number Formatting Patterns"), Compact_Decimal_Formatting( 177 SectionId.Numbers, 178 "Compact Decimal Formatting"), Compact_Decimal_Formatting_Other( 179 SectionId.Numbers, 180 "Compact Decimal Formatting (Other Numbering Systems)"), Measurement_Systems( 181 SectionId.Units, 182 "Measurement Systems"), Duration( 183 SectionId.Units), Length( 184 SectionId.Units), Area( 185 SectionId.Units), Volume( 186 SectionId.Units), SpeedAcceleration( 187 SectionId.Units, 188 "Speed and Acceleration"), MassWeight( 189 SectionId.Units, 190 "Mass and Weight"), EnergyPower( 191 SectionId.Units, 192 "Energy and Power"), ElectricalFrequency( 193 SectionId.Units, 194 "Electrical and Frequency"), Weather( 195 SectionId.Units), Digital( 196 SectionId.Units), Coordinates( 197 SectionId.Units), OtherUnits( 198 SectionId.Units, 199 "Other Units"), CompoundUnits( 200 SectionId.Units, 201 "Compound Units"), Displaying_Lists( 202 SectionId.Misc, 203 "Displaying Lists"), LinguisticElements( 204 SectionId.Misc, 205 "Linguistic Elements"), Transforms( 206 SectionId.Misc), Identity( 207 SectionId.Special), Version( 208 SectionId.Special), Suppress( 209 SectionId.Special), Deprecated( 210 SectionId.Special), Unknown( 211 SectionId.Special), C_NAmerica( 212 SectionId.Currencies, 213 "North America (C)"), //need to add (C) to differentiate from Timezone territories 214 C_SAmerica(SectionId.Currencies, "South America (C)"), C_NWEurope(SectionId.Currencies, "Northern/Western Europe"), C_SEEurope(SectionId.Currencies, 215 "Southern/Eastern Europe"), C_NAfrica(SectionId.Currencies, "Northern Africa"), C_WAfrica(SectionId.Currencies, "Western Africa"), C_MAfrica( 216 SectionId.Currencies, "Middle Africa"), C_EAfrica(SectionId.Currencies, "Eastern Africa"), C_SAfrica(SectionId.Currencies, 217 "Southern Africa"), C_WAsia(SectionId.Currencies, "Western Asia (C)"), C_CAsia(SectionId.Currencies, "Central Asia (C)"), C_EAsia( 218 SectionId.Currencies, "Eastern Asia (C)"), C_SAsia(SectionId.Currencies, "Southern Asia (C)"), C_SEAsia(SectionId.Currencies, 219 "Southeast Asia (C)"), C_Oceania(SectionId.Currencies, "Oceania (C)"), C_Unknown(SectionId.Currencies, "Unknown Region (C)"), 220 // BCP47 221 u_Extension(SectionId.BCP47), t_Extension(SectionId.BCP47), 222 // Supplemental 223 Alias(SectionId.Supplemental), IdValidity(SectionId.Supplemental), Locale(SectionId.Supplemental), RegionMapping(SectionId.Supplemental), WZoneMapping( 224 SectionId.Supplemental), Transform(SectionId.Supplemental), UnitPreferences(SectionId.Supplemental), Likely(SectionId.Supplemental), LanguageMatch( 225 SectionId.Supplemental), TerritoryInfo(SectionId.Supplemental), LanguageInfo(SectionId.Supplemental), LanguageGroup( 226 SectionId.Supplemental), Fallback(SectionId.Supplemental), Gender(SectionId.Supplemental), Metazone(SectionId.Supplemental), NumberSystem( 227 SectionId.Supplemental), Plural(SectionId.Supplemental), PluralRange(SectionId.Supplemental), Containment( 228 SectionId.Supplemental), Currency(SectionId.Supplemental), Calendar(SectionId.Supplemental), WeekData( 229 SectionId.Supplemental), Measurement(SectionId.Supplemental), Language(SectionId.Supplemental), RBNF( 230 SectionId.Supplemental), Segmentation(SectionId.Supplemental), DayPeriod(SectionId.Supplemental), 231 232 Category(SectionId.Characters), 233 // [Smileys, People, Animals & Nature, Food & Drink, Travel & Places, Activities, Objects, Symbols, Flags] 234 Smileys(SectionId.Characters), People(SectionId.Characters), Animals_Nature(SectionId.Characters, "Animals & Nature"), Food_Drink(SectionId.Characters, 235 "Food & Drink"), Travel_Places(SectionId.Characters, "Travel & Places"), Activities(SectionId.Characters), Objects( 236 SectionId.Characters), Symbols2(SectionId.Characters), Flags(SectionId.Characters), Component(SectionId.Characters), 237 238 Typography(SectionId.Characters), 239 ; 240 241 private final SectionId sectionId; 242 243 private PageId(SectionId sectionId, String... alternateNames) { 244 this.sectionId = sectionId; 245 SectionIdToPageIds.put(sectionId, this); 246 PageIdNames.add(this, alternateNames); 247 } 248 249 /** 250 * Construct a pageId given a string 251 * 252 * @param name 253 * @return 254 */ 255 public static PageId forString(String name) { 256 try { 257 return PageIdNames.forString(name); 258 } catch (Exception e) { 259 throw new ICUException("No PageId for " + name, e); 260 } 261 } 262 263 /** 264 * Returns the page id 265 * 266 * @return a page ID, such as 'Languages' 267 */ 268 public String toString() { 269 return PageIdNames.toString(this); 270 } 271 272 /** 273 * Get the containing section id, such as 'Code Lists' 274 * 275 * @return the containing section ID 276 */ 277 public SectionId getSectionId() { 278 return sectionId; 279 } 280 } 281 282 private final SectionId sectionId; 283 private final PageId pageId; 284 private final String header; 285 private final String code; 286 private final String originalPath; 287 private final SurveyToolStatus status; 288 289 // Used for ordering 290 private final int headerOrder; 291 private final int codeOrder; 292 private final SubstringOrder codeSuborder; 293 294 static final Pattern SEMI = PatternCache.get("\\s*;\\s*"); 295 static final Matcher ALT_MATCHER = PatternCache.get( 296 "\\[@alt=\"([^\"]*+)\"]") 297 .matcher(""); 298 299 static final Collator alphabetic = CLDRConfig.getInstance().getCollatorRoot(); 300 301 // static final RuleBasedCollator alphabetic = (RuleBasedCollator) Collator 302 // .getInstance(ULocale.ENGLISH); 303 // static { 304 // alphabetic.setNumericCollation(true); 305 // alphabetic.freeze(); 306 // } 307 308 static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance(); 309 static final Map<String, String> metazoneToContinent = supplementalDataInfo 310 .getMetazoneToContinentMap(); 311 static final StandardCodes standardCode = StandardCodes.make(); 312 static final Map<String, String> metazoneToPageTerritory = new HashMap<String, String>(); 313 static { 314 Map<String, Map<String, String>> metazoneToRegionToZone = supplementalDataInfo.getMetazoneToRegionToZone(); 315 for (Entry<String, Map<String, String>> metazoneEntry : metazoneToRegionToZone.entrySet()) { 316 String metazone = metazoneEntry.getKey(); 317 String worldZone = metazoneEntry.getValue().get("001"); 318 String territory = Containment.getRegionFromZone(worldZone); 319 if (territory == null) { 320 territory = "ZZ"; 321 } 322 // Russia, Antarctica => territory 323 // in Australasia, Asia, S. America => subcontinent 324 // in N. America => N. America (grouping of 3 subcontinents) 325 // in everything else => continent 326 if (territory.equals("RU") || territory.equals("AQ")) { 327 metazoneToPageTerritory.put(metazone, territory); 328 } else { 329 String continent = Containment.getContinent(territory); 330 String subcontinent = Containment.getSubcontinent(territory); 331 if (continent.equals("142")) { // Asia 332 metazoneToPageTerritory.put(metazone, subcontinent); 333 } else if (continent.equals("019")) { // Americas 334 metazoneToPageTerritory.put(metazone, subcontinent.equals("005") ? subcontinent : "003"); 335 } else if (subcontinent.equals("053")) { // Australasia 336 metazoneToPageTerritory.put(metazone, subcontinent); 337 } else { 338 metazoneToPageTerritory.put(metazone, continent); 339 } 340 } 341 } 342 } 343 344 /** 345 * @param section 346 * @param sectionOrder 347 * @param page 348 * @param pageOrder 349 * @param header 350 * @param headerOrder 351 * @param code 352 * @param codeOrder 353 * @param suborder 354 * @param status 355 */ 356 private PathHeader(SectionId sectionId, PageId pageId, String header, 357 int headerOrder, String code, int codeOrder, SubstringOrder suborder, SurveyToolStatus status, 358 String originalPath) { 359 this.sectionId = sectionId; 360 this.pageId = pageId; 361 this.header = header; 362 this.headerOrder = headerOrder; 363 this.code = code; 364 this.codeOrder = codeOrder; 365 this.codeSuborder = suborder; 366 this.originalPath = originalPath; 367 this.status = status; 368 } 369 370 /** 371 * Return a factory for use in creating the headers. This should be cached. 372 * The calls are thread-safe. The englishFile sets a static for now; after 373 * the first time, null can be passed. 374 * 375 * @param englishFile 376 */ 377 public static Factory getFactory(CLDRFile englishFile) { 378 if (factorySingleton == null) { 379 if (englishFile == null) { 380 throw new IllegalArgumentException("English CLDRFile must not be null"); 381 } 382 if (!englishFile.getLocaleID().equals(ULocale.ENGLISH.getBaseName())) { 383 throw new IllegalArgumentException("PathHeader's CLDRFile must be '" + 384 ULocale.ENGLISH.getBaseName() + "', but found '" + englishFile.getLocaleID() + "'"); 385 } 386 factorySingleton = new Factory(englishFile); 387 } 388 return factorySingleton; 389 } 390 391 /** 392 * @deprecated 393 */ 394 public String getSection() { 395 return sectionId.toString(); 396 } 397 398 public SectionId getSectionId() { 399 return sectionId; 400 } 401 402 /** 403 * @deprecated 404 */ 405 public String getPage() { 406 return pageId.toString(); 407 } 408 409 public PageId getPageId() { 410 return pageId; 411 } 412 413 public String getHeader() { 414 return header == null ? "" : header; 415 } 416 417 public String getCode() { 418 return code; 419 } 420 421 public String getHeaderCode() { 422 return getHeader() + ": " + getCode(); 423 } 424 425 public String getOriginalPath() { 426 return originalPath; 427 } 428 429 public SurveyToolStatus getSurveyToolStatus() { 430 return status; 431 } 432 433 @Override 434 public String toString() { 435 return sectionId 436 + "\t" + pageId 437 + "\t" + header // + "\t" + headerOrder 438 + "\t" + code // + "\t" + codeOrder 439 ; 440 } 441 442 @Override 443 public int compareTo(PathHeader other) { 444 // Within each section, order alphabetically if the integer orders are 445 // not different. 446 try { 447 int result; 448 if (0 != (result = sectionId.compareTo(other.sectionId))) { 449 return result; 450 } 451 if (0 != (result = pageId.compareTo(other.pageId))) { 452 return result; 453 } 454 if (0 != (result = headerOrder - other.headerOrder)) { 455 return result; 456 } 457 if (0 != (result = alphabeticCompare(header, other.header))) { 458 return result; 459 } 460 if (0 != (result = codeOrder - other.codeOrder)) { 461 return result; 462 } 463 if (codeSuborder != null) { // do all three cases, for transitivity 464 if (other.codeSuborder != null) { 465 if (0 != (result = codeSuborder.compareTo(other.codeSuborder))) { 466 return result; 467 } 468 } else { 469 return 1; // if codeSuborder != null (and other.codeSuborder 470 // == null), it is greater 471 } 472 } else if (other.codeSuborder != null) { 473 return -1; // if codeSuborder == null (and other.codeSuborder != 474 // null), it is greater 475 } 476 if (0 != (result = alphabeticCompare(code, other.code))) { 477 return result; 478 } 479 if (!SKIP_ORIGINAL_PATH && 0 != (result = alphabeticCompare(originalPath, other.originalPath))) { 480 return result; 481 } 482 return 0; 483 } catch (RuntimeException e) { 484 throw new IllegalArgumentException("Internal problem comparing " + this + " and " + other, e); 485 } 486 } 487 488 public int compareHeader(PathHeader other) { 489 int result; 490 if (0 != (result = headerOrder - other.headerOrder)) { 491 return result; 492 } 493 if (0 != (result = alphabeticCompare(header, other.header))) { 494 return result; 495 } 496 return result; 497 } 498 499 public int compareCode(PathHeader other) { 500 int result; 501 if (0 != (result = codeOrder - other.codeOrder)) { 502 return result; 503 } 504 if (codeSuborder != null) { // do all three cases, for transitivity 505 if (other.codeSuborder != null) { 506 if (0 != (result = codeSuborder.compareTo(other.codeSuborder))) { 507 return result; 508 } 509 } else { 510 return 1; // if codeSuborder != null (and other.codeSuborder 511 // == null), it is greater 512 } 513 } else if (other.codeSuborder != null) { 514 return -1; // if codeSuborder == null (and other.codeSuborder != 515 // null), it is greater 516 } 517 if (0 != (result = alphabeticCompare(code, other.code))) { 518 return result; 519 } 520 return result; 521 } 522 523 @Override 524 public boolean equals(Object obj) { 525 PathHeader other; 526 try { 527 other = (PathHeader) obj; 528 } catch (Exception e) { 529 return false; 530 } 531 return sectionId == other.sectionId && pageId == other.pageId 532 && header.equals(other.header) && code.equals(other.code); 533 } 534 535 @Override 536 public int hashCode() { 537 return sectionId.hashCode() ^ pageId.hashCode() ^ header.hashCode() ^ code.hashCode(); 538 } 539 540 public static class Factory implements Transform<String, PathHeader> { 541 static final RegexLookup<RawData> lookup = RegexLookup 542 .of(new PathHeaderTransform()) 543 .setPatternTransform( 544 RegexLookup.RegexFinderTransformPath) 545 .loadFromFile( 546 PathHeader.class, 547 "data/PathHeader.txt"); 548 // synchronized with lookup 549 static final Output<String[]> args = new Output<String[]>(); 550 // synchronized with lookup 551 static final Counter<RawData> counter = new Counter<RawData>(); 552 // synchronized with lookup 553 static final Map<RawData, String> samples = new HashMap<RawData, String>(); 554 // synchronized with lookup 555 static int order; 556 static SubstringOrder suborder; 557 558 static final Map<String, PathHeader> cache = new HashMap<String, PathHeader>(); 559 // synchronized with cache 560 static final Map<SectionId, Map<PageId, SectionPage>> sectionToPageToSectionPage = new EnumMap<SectionId, Map<PageId, SectionPage>>( 561 SectionId.class); 562 static final Relation<SectionPage, String> sectionPageToPaths = Relation 563 .of(new TreeMap<SectionPage, Set<String>>(), 564 HashSet.class); 565 private static CLDRFile englishFile; 566 private Set<String> matchersFound = new HashSet<String>(); 567 568 /** 569 * Create a factory for creating PathHeaders. 570 * 571 * @param englishFile 572 * - only sets the file (statically!) if not already set. 573 */ 574 private Factory(CLDRFile englishFile) { 575 setEnglishCLDRFileIfNotSet(englishFile); // temporary 576 } 577 578 /** 579 * Returns true if we set it, false if set before. 580 * 581 * @param englishFile2 582 * @return 583 */ 584 private static boolean setEnglishCLDRFileIfNotSet(CLDRFile englishFile2) { 585 synchronized (Factory.class) { 586 if (englishFile != null) { 587 return false; 588 } 589 englishFile = englishFile2; 590 return true; 591 } 592 } 593 594 /** 595 * Use only when trying to find unmatched patterns 596 */ 597 public void clearCache() { 598 synchronized (cache) { 599 cache.clear(); 600 } 601 } 602 603 /** 604 * Return the PathHeader for a given path. Thread-safe. 605 */ 606 public PathHeader fromPath(String path) { 607 return fromPath(path, null); 608 } 609 610 /** 611 * Return the PathHeader for a given path. Thread-safe. 612 */ 613 public PathHeader transform(String path) { 614 return fromPath(path, null); 615 } 616 617 /** 618 * Return the PathHeader for a given path. Thread-safe. 619 * @param failures a list of failures to add to. 620 */ 621 public PathHeader fromPath(String path, List<String> failures) { 622 if (path == null) { 623 throw new NullPointerException("Path cannot be null"); 624 } 625 synchronized (cache) { 626 PathHeader old = cache.get(path); 627 if (old != null) { 628 return old; 629 } 630 } 631 synchronized (lookup) { 632 String cleanPath = path; 633 // special handling for alt 634 String alt = null; 635 int altPos = cleanPath.indexOf("[@alt="); 636 if (altPos >= 0 && !cleanPath.endsWith("/symbol[@alt=\"narrow\"]")) { 637 if (ALT_MATCHER.reset(cleanPath).find()) { 638 alt = ALT_MATCHER.group(1); 639 cleanPath = cleanPath.substring(0, ALT_MATCHER.start()) 640 + cleanPath.substring(ALT_MATCHER.end()); 641 int pos = alt.indexOf("proposed"); 642 if (pos >= 0 && !path.startsWith("//ldml/collations")) { 643 alt = pos == 0 ? null : alt.substring(0, pos - 1); 644 // drop "proposed", 645 // change "xxx-proposed" to xxx. 646 } 647 } else { 648 throw new IllegalArgumentException(); 649 } 650 } 651 Output<Finder> matcherFound = new Output<Finder>(); 652 RawData data = lookup.get(cleanPath, null, args, matcherFound, failures); 653 if (data == null) { 654 return null; 655 } 656 matchersFound.add(matcherFound.value.toString()); 657 counter.add(data, 1); 658 if (!samples.containsKey(data)) { 659 samples.put(data, cleanPath); 660 } 661 try { 662 PathHeader result = new PathHeader( 663 SectionId.forString(fix(data.section, 0)), 664 PageId.forString(fix(data.page, 0)), 665 fix(data.header, data.headerOrder), 666 order, // only valid after call to fix. TODO, make 667 // this cleaner 668 fix(data.code + (alt == null ? "" : ("-" + alt)), data.codeOrder), 669 order, // only valid after call to fix 670 suborder, 671 data.status, 672 path); 673 synchronized (cache) { 674 PathHeader old = cache.get(path); 675 if (old == null) { 676 cache.put(path, result); 677 } else { 678 result = old; 679 } 680 Map<PageId, SectionPage> pageToPathHeaders = sectionToPageToSectionPage 681 .get(result.sectionId); 682 if (pageToPathHeaders == null) { 683 sectionToPageToSectionPage.put(result.sectionId, pageToPathHeaders = new EnumMap<PageId, SectionPage>(PageId.class)); 684 } 685 SectionPage sectionPage = pageToPathHeaders.get(result.pageId); 686 if (sectionPage == null) { 687 sectionPage = new SectionPage(result.sectionId, result.pageId); 688 pageToPathHeaders.put(result.pageId, sectionPage); 689 } 690 sectionPageToPaths.put(sectionPage, path); 691 } 692 return result; 693 } catch (Exception e) { 694 throw new IllegalArgumentException( 695 "Probably mismatch in Page/Section enum, or too few capturing groups in regex for " + cleanPath, 696 e); 697 } 698 } 699 } 700 701 private static class SectionPage implements Comparable<SectionPage> { 702 private final SectionId sectionId; 703 private final PageId pageId; 704 SectionPage(SectionId sectionId, PageId pageId)705 public SectionPage(SectionId sectionId, PageId pageId) { 706 this.sectionId = sectionId; 707 this.pageId = pageId; 708 } 709 710 @Override compareTo(SectionPage other)711 public int compareTo(SectionPage other) { 712 // Within each section, order alphabetically if the integer 713 // orders are 714 // not different. 715 int result; 716 if (0 != (result = sectionId.compareTo(other.sectionId))) { 717 return result; 718 } 719 if (0 != (result = pageId.compareTo(other.pageId))) { 720 return result; 721 } 722 return 0; 723 } 724 725 @Override equals(Object obj)726 public boolean equals(Object obj) { 727 PathHeader other; 728 try { 729 other = (PathHeader) obj; 730 } catch (Exception e) { 731 return false; 732 } 733 return sectionId == other.sectionId && pageId == other.pageId; 734 } 735 736 @Override hashCode()737 public int hashCode() { 738 return sectionId.hashCode() ^ pageId.hashCode(); 739 } 740 } 741 742 /** 743 * Returns a set of paths currently associated with the given section 744 * and page. 745 * <p> 746 * <b>Warning:</b> 747 * <ol> 748 * <li>The set may not be complete for a cldrFile unless all of paths in 749 * the file have had fromPath called. And this includes getExtraPaths(). 750 * </li> 751 * <li>The set may include paths that have no value in the current 752 * cldrFile.</li> 753 * <li>The set may be empty, if the section/page aren't valid.</li> 754 * </ol> 755 * Thread-safe. 756 * 757 * @target a collection where the paths are to be returned. 758 */ getCachedPaths(SectionId sectionId, PageId page)759 public static Set<String> getCachedPaths(SectionId sectionId, PageId page) { 760 Set<String> target = new HashSet<String>(); 761 synchronized (cache) { 762 Map<PageId, SectionPage> pageToSectionPage = sectionToPageToSectionPage 763 .get(sectionId); 764 if (pageToSectionPage == null) { 765 return target; 766 } 767 SectionPage sectionPage = pageToSectionPage.get(page); 768 if (sectionPage == null) { 769 return target; 770 } 771 Set<String> set = sectionPageToPaths.getAll(sectionPage); 772 target.addAll(set); 773 } 774 return target; 775 } 776 777 /** 778 * Return the Sections and Pages that are in defined, for display in 779 * menus. Both are ordered. 780 */ getSectionIdsToPageIds()781 public static Relation<SectionId, PageId> getSectionIdsToPageIds() { 782 SectionIdToPageIds.freeze(); // just in case 783 return SectionIdToPageIds; 784 } 785 786 /** 787 * Return paths that have the designated section and page. 788 * 789 * @param sectionId 790 * @param pageId 791 * @param file 792 */ filterCldr(SectionId sectionId, PageId pageId, CLDRFile file)793 public Iterable<String> filterCldr(SectionId sectionId, PageId pageId, CLDRFile file) { 794 return new FilteredIterable(sectionId, pageId, file); 795 } 796 797 /** 798 * Return the names for Sections and Pages that are defined, for display 799 * in menus. Both are ordered. 800 * 801 * @deprecated Use getSectionIdsToPageIds 802 */ getSectionsToPages()803 public static LinkedHashMap<String, Set<String>> getSectionsToPages() { 804 LinkedHashMap<String, Set<String>> sectionsToPages = new LinkedHashMap<String, Set<String>>(); 805 for (PageId pageId : PageId.values()) { 806 String sectionId2 = pageId.getSectionId().toString(); 807 Set<String> pages = sectionsToPages.get(sectionId2); 808 if (pages == null) { 809 sectionsToPages.put(sectionId2, pages = new LinkedHashSet<String>()); 810 } 811 pages.add(pageId.toString()); 812 } 813 return sectionsToPages; 814 } 815 816 /** 817 * @deprecated, use the filterCldr with the section/page ids. 818 */ filterCldr(String section, String page, CLDRFile file)819 public Iterable<String> filterCldr(String section, String page, CLDRFile file) { 820 return new FilteredIterable(section, page, file); 821 } 822 823 private class FilteredIterable implements Iterable<String>, SimpleIterator<String> { 824 private final SectionId sectionId; 825 private final PageId pageId; 826 private final Iterator<String> fileIterator; 827 FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file)828 FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file) { 829 this.sectionId = sectionId; 830 this.pageId = pageId; 831 this.fileIterator = file.fullIterable().iterator(); 832 } 833 FilteredIterable(String section, String page, CLDRFile file)834 public FilteredIterable(String section, String page, CLDRFile file) { 835 this(SectionId.forString(section), PageId.forString(page), file); 836 } 837 838 @Override iterator()839 public Iterator<String> iterator() { 840 return With.toIterator(this); 841 } 842 843 @Override next()844 public String next() { 845 while (fileIterator.hasNext()) { 846 String path = fileIterator.next(); 847 PathHeader pathHeader = fromPath(path); 848 if (sectionId == pathHeader.sectionId && pageId == pathHeader.pageId) { 849 return path; 850 } 851 } 852 return null; 853 } 854 } 855 856 private static class ChronologicalOrder { 857 private Map<String, Integer> map = new HashMap<String, Integer>(); 858 private String item; 859 private int order; 860 private ChronologicalOrder toClear; 861 ChronologicalOrder(ChronologicalOrder toClear)862 ChronologicalOrder(ChronologicalOrder toClear) { 863 this.toClear = toClear; 864 } 865 getOrder()866 int getOrder() { 867 return order; 868 } 869 set(String itemToOrder)870 public String set(String itemToOrder) { 871 if (itemToOrder.startsWith("*")) { 872 item = itemToOrder.substring(1, itemToOrder.length()); 873 return item; // keep old order 874 } 875 item = itemToOrder; 876 Integer old = map.get(item); 877 if (old != null) { 878 order = old.intValue(); 879 } else { 880 order = map.size(); 881 map.put(item, order); 882 clearLower(); 883 } 884 return item; 885 } 886 clearLower()887 private void clearLower() { 888 if (toClear != null) { 889 toClear.map.clear(); 890 toClear.order = 0; 891 toClear.clearLower(); 892 } 893 } 894 } 895 896 static class RawData { 897 static ChronologicalOrder codeOrdering = new ChronologicalOrder(null); 898 static ChronologicalOrder headerOrdering = new ChronologicalOrder(codeOrdering); 899 RawData(String source)900 public RawData(String source) { 901 String[] split = SEMI.split(source); 902 section = split[0]; 903 // HACK 904 if (section.equals("Timezones") && split[1].equals("Indian")) { 905 page = "Indian2"; 906 } else { 907 page = split[1]; 908 } 909 910 header = headerOrdering.set(split[2]); 911 headerOrder = headerOrdering.getOrder(); 912 913 code = codeOrdering.set(split[3]); 914 codeOrder = codeOrdering.getOrder(); 915 916 status = split.length < 5 ? SurveyToolStatus.READ_WRITE : SurveyToolStatus.valueOf(split[4]); 917 } 918 919 public final String section; 920 public final String page; 921 public final String header; 922 public final int headerOrder; 923 public final String code; 924 public final int codeOrder; 925 public final SurveyToolStatus status; 926 927 @Override 928 public String toString() { 929 return section + "\t" 930 + page + "\t" 931 + header + "\t" + headerOrder + "\t" 932 + code + "\t" + codeOrder + "\t" 933 + status; 934 } 935 } 936 937 static class PathHeaderTransform implements Transform<String, RawData> { 938 @Override 939 public RawData transform(String source) { 940 return new RawData(source); 941 } 942 } 943 944 /** 945 * Internal data, for testing and debugging. 946 * 947 * @deprecated 948 */ 949 public class CounterData extends Row.R4<String, RawData, String, String> { 950 public CounterData(String a, RawData b, String c) { 951 super(a, b, c == null ? "no sample" : c, c == null ? "no sample" : fromPath(c) 952 .toString()); 953 } 954 } 955 956 /** 957 * Get the internal data, for testing and debugging. 958 * 959 * @deprecated 960 */ 961 public Counter<CounterData> getInternalCounter() { 962 synchronized (lookup) { 963 Counter<CounterData> result = new Counter<CounterData>(); 964 for (Map.Entry<Finder, RawData> foo : lookup) { 965 Finder finder = foo.getKey(); 966 RawData data = foo.getValue(); 967 long count = counter.get(data); 968 result.add(new CounterData(finder.toString(), data, samples.get(data)), count); 969 } 970 return result; 971 } 972 } 973 974 static Map<String, Transform<String, String>> functionMap = new HashMap<String, Transform<String, String>>(); 975 static String[] months = { "Jan", "Feb", "Mar", 976 "Apr", "May", "Jun", 977 "Jul", "Aug", "Sep", 978 "Oct", "Nov", "Dec", 979 "Und" }; 980 static List<String> days = Arrays.asList("sun", "mon", 981 "tue", "wed", "thu", 982 "fri", "sat"); 983 static List<String> unitOrder = DtdData.unitOrder.getOrder(); 984 static final MapComparator<String> dayPeriods = new MapComparator<String>().add( 985 "am", "pm", "midnight", "noon", 986 "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2").freeze(); 987 // static Map<String, String> likelySubtags = 988 // supplementalDataInfo.getLikelySubtags(); 989 static LikelySubtags likelySubtags = new LikelySubtags(); 990 static HyphenSplitter hyphenSplitter = new HyphenSplitter(); 991 static Transform<String, String> catFromTerritory; 992 static Transform<String, String> catFromTimezone; 993 static { 994 // Put any new functions used in PathHeader.txt in here. 995 // To change the order of items within a section or heading, set 996 // order/suborder to be the relative position of the current item. 997 functionMap.put("month", new Transform<String, String>() { 998 public String transform(String source) { 999 int m = Integer.parseInt(source); 1000 order = m; 1001 return months[m - 1]; 1002 } 1003 }); 1004 functionMap.put("count", new Transform<String, String>() { 1005 public String transform(String source) { 1006 suborder = new SubstringOrder(source); 1007 return source; 1008 } 1009 }); 1010 functionMap.put("count2", new Transform<String, String>() { 1011 public String transform(String source) { 1012 int pos = source.indexOf('-'); 1013 source = pos + source.substring(pos); 1014 suborder = new SubstringOrder(source); // make 10000-... 1015 // into 5- 1016 return source; 1017 } 1018 }); 1019 functionMap.put("currencySymbol", new Transform<String, String>() { 1020 public String transform(String source) { 1021 order = 901; 1022 if (source.endsWith("narrow")) { 1023 order = 902; 1024 } 1025 if (source.endsWith("variant")) { 1026 order = 903; 1027 } 1028 return source; 1029 } 1030 }); 1031 functionMap.put("unitCount", new Transform<String, String>() { 1032 public String transform(String source) { 1033 String[] unitLengths = { "long", "short", "narrow" }; 1034 int pos = 9; 1035 for (int i = 0; i < unitLengths.length; i++) { 1036 if (source.startsWith(unitLengths[i])) { 1037 pos = i; 1038 continue; 1039 } 1040 } 1041 order = pos; 1042 suborder = new SubstringOrder(pos + "-" + source); // 1043 return source; 1044 } 1045 }); 1046 functionMap.put("day", new Transform<String, String>() { 1047 public String transform(String source) { 1048 int m = days.indexOf(source); 1049 order = m; 1050 return source; 1051 } 1052 }); 1053 functionMap.put("dayPeriod", new Transform<String, String>() { 1054 public String transform(String source) { 1055 try { 1056 order = dayPeriods.getNumericOrder(source); 1057 } catch (Exception e) { 1058 // if an old item is tried, like "evening", this will fail. 1059 // so that old data still works, hack this. 1060 order = Math.abs(source.hashCode() << 16); 1061 } 1062 return source; 1063 } 1064 }); 1065 functionMap.put("calendar", new Transform<String, String>() { 1066 Map<String, String> fixNames = Builder.with(new HashMap<String, String>()) 1067 .put("islamicc", "Islamic Civil") 1068 .put("roc", "Minguo") 1069 .put("Ethioaa", "Ethiopic Amete Alem") 1070 .put("Gregory", "Gregorian") 1071 .put("iso8601", "ISO 8601") 1072 .freeze(); 1073 1074 public String transform(String source) { 1075 String result = fixNames.get(source); 1076 return result != null ? result : UCharacter.toTitleCase(source, null); 1077 } 1078 }); 1079 1080 functionMap.put("calField", new Transform<String, String>() { 1081 public String transform(String source) { 1082 String[] fields = source.split(":", 3); 1083 order = 0; 1084 final List<String> widthValues = Arrays.asList( 1085 "wide", "abbreviated", "short", "narrow"); 1086 final List<String> calendarFieldValues = Arrays.asList( 1087 "Eras", 1088 "Quarters", 1089 "Months", 1090 "Days", 1091 "DayPeriods", 1092 "Formats"); 1093 final List<String> calendarFormatTypes = Arrays.asList( 1094 "Standard", 1095 "Flexible", 1096 "Intervals"); 1097 final List<String> calendarContextTypes = Arrays.asList( 1098 "none", 1099 "format", 1100 "stand-alone"); 1101 final List<String> calendarFormatSubtypes = Arrays.asList( 1102 "date", 1103 "time", 1104 "time12", 1105 "time24", 1106 "dateTime", 1107 "fallback"); 1108 1109 Map<String, String> fixNames = Builder.with(new HashMap<String, String>()) 1110 .put("DayPeriods", "Day Periods") 1111 .put("format", "Formatting") 1112 .put("stand-alone", "Standalone") 1113 .put("none", "") 1114 .put("date", "Date Formats") 1115 .put("time", "Time Formats") 1116 .put("time12", "12 Hour Time Formats") 1117 .put("time24", "24 Hour Time Formats") 1118 .put("dateTime", "Date & Time Combination Formats") 1119 .freeze(); 1120 1121 if (calendarFieldValues.contains(fields[0])) { 1122 order = calendarFieldValues.indexOf(fields[0]) * 100; 1123 } else { 1124 order = calendarFieldValues.size() * 100; 1125 } 1126 1127 if (fields[0].equals("Formats")) { 1128 if (calendarFormatTypes.contains(fields[1])) { 1129 order += calendarFormatTypes.indexOf(fields[1]) * 10; 1130 } else { 1131 order += calendarFormatTypes.size() * 10; 1132 } 1133 if (calendarFormatSubtypes.contains(fields[2])) { 1134 order += calendarFormatSubtypes.indexOf(fields[2]); 1135 } else { 1136 order += calendarFormatSubtypes.size(); 1137 } 1138 } else { 1139 if (widthValues.contains(fields[1])) { 1140 order += widthValues.indexOf(fields[1]) * 10; 1141 } else { 1142 order += widthValues.size() * 10; 1143 } 1144 if (calendarContextTypes.contains(fields[2])) { 1145 order += calendarContextTypes.indexOf(fields[2]); 1146 } else { 1147 order += calendarContextTypes.size(); 1148 } 1149 } 1150 1151 String[] fixedFields = new String[fields.length]; 1152 for (int i = 0; i < fields.length; i++) { 1153 String s = fixNames.get(fields[i]); 1154 fixedFields[i] = s != null ? s : fields[i]; 1155 } 1156 1157 return fixedFields[0] + 1158 " - " + fixedFields[1] + 1159 (fixedFields[2].length() > 0 ? " - " + fixedFields[2] : ""); 1160 } 1161 }); 1162 1163 functionMap.put("titlecase", new Transform<String, String>() { 1164 public String transform(String source) { 1165 return UCharacter.toTitleCase(source, null); 1166 } 1167 }); 1168 functionMap.put("categoryFromScript", new Transform<String, String>() { 1169 public String transform(String source) { 1170 String script = hyphenSplitter.split(source); 1171 Info info = ScriptMetadata.getInfo(script); 1172 if (info == null) { 1173 info = ScriptMetadata.getInfo("Zzzz"); 1174 } 1175 order = 100 - info.idUsage.ordinal(); 1176 return info.idUsage.name; 1177 } 1178 }); 1179 functionMap.put("categoryFromKey", new Transform<String, String>() { 1180 Map<String, String> fixNames = Builder.with(new HashMap<String, String>()) 1181 .put("lb", "Line Break") 1182 .put("hc", "Hour Cycle") 1183 .put("ms", "Measurement System") 1184 .put("cf", "Currency Format") 1185 .freeze(); 1186 1187 public String transform(String source) { 1188 String fixedName = fixNames.get(source); 1189 return fixedName != null ? fixedName : source; 1190 } 1191 }); 1192 functionMap.put("languageSection", new Transform<String, String>() { 1193 char[] languageRangeStartPoints = { 'A', 'E', 'K', 'O', 'T' }; 1194 char[] languageRangeEndPoints = { 'D', 'J', 'N', 'S', 'Z' }; 1195 1196 public String transform(String source0) { 1197 char firstLetter = getEnglishFirstLetter(source0).charAt(0); 1198 for (int i = 0; i < languageRangeStartPoints.length; i++) { 1199 if (firstLetter >= languageRangeStartPoints[i] && firstLetter <= languageRangeEndPoints[i]) { 1200 return "Languages (" + Character.toUpperCase(languageRangeStartPoints[i]) + "-" + Character.toUpperCase(languageRangeEndPoints[i]) 1201 + ")"; 1202 } 1203 } 1204 return "Languages"; 1205 } 1206 }); 1207 functionMap.put("firstLetter", new Transform<String, String>() { 1208 public String transform(String source0) { 1209 return getEnglishFirstLetter(source0); 1210 } 1211 }); 1212 functionMap.put("languageSort", new Transform<String, String>() { 1213 public String transform(String source0) { 1214 String languageOnlyPart; 1215 int underscorePos = source0.indexOf("_"); 1216 if (underscorePos > 0) { 1217 languageOnlyPart = source0.substring(0, underscorePos); 1218 } else { 1219 languageOnlyPart = source0; 1220 } 1221 1222 return englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart) + " \u25BA " + source0; 1223 } 1224 }); 1225 functionMap.put("scriptFromLanguage", new Transform<String, String>() { 1226 public String transform(String source0) { 1227 String language = hyphenSplitter.split(source0); 1228 String script = likelySubtags.getLikelyScript(language); 1229 if (script == null) { 1230 script = likelySubtags.getLikelyScript(language); 1231 } 1232 String scriptName = englishFile.getName(CLDRFile.SCRIPT_NAME, script); 1233 return "Languages in " + (script.equals("Hans") || script.equals("Hant") ? "Han Script" 1234 : scriptName.endsWith(" Script") ? scriptName 1235 : scriptName + " Script"); 1236 } 1237 }); 1238 functionMap.put("categoryFromTerritory", 1239 catFromTerritory = new Transform<String, String>() { 1240 public String transform(String source) { 1241 String territory = getSubdivisionsTerritory(source, null); 1242 String container = Containment.getContainer(territory); 1243 order = Containment.getOrder(territory); 1244 return englishFile.getName(CLDRFile.TERRITORY_NAME, container); 1245 } 1246 }); 1247 functionMap.put("territorySection", new Transform<String, String>() { 1248 final Set<String> specialRegions = new HashSet<String>(Arrays.asList("EZ", "EU", "QO", "UN", "ZZ")); 1249 1250 public String transform(String source0) { 1251 // support subdivisions 1252 String theTerritory = getSubdivisionsTerritory(source0, null); 1253 try { 1254 if (specialRegions.contains(theTerritory) 1255 || theTerritory.charAt(0) < 'A' && Integer.valueOf(theTerritory) > 0) { 1256 return "Geographic Regions"; 1257 } 1258 } catch (NumberFormatException ex) { 1259 } 1260 String theContinent = Containment.getContinent(theTerritory); 1261 String theSubContinent; 1262 switch (theContinent) { // was Integer.valueOf 1263 case "019": // Americas - For the territorySection, we just group North America & South America 1264 final String subcontinent = Containment.getSubcontinent(theTerritory); 1265 theSubContinent = subcontinent.equals("005") ? "005" : "003"; // was Integer.valueOf(subcontinent) == 5 1266 return "Territories (" + englishFile.getName(CLDRFile.TERRITORY_NAME, theSubContinent) + ")"; 1267 case "001": 1268 case "ZZ": 1269 return "Geographic Regions"; // not in containment 1270 default: 1271 return "Territories (" + englishFile.getName(CLDRFile.TERRITORY_NAME, theContinent) + ")"; 1272 } 1273 } 1274 }); 1275 functionMap.put("categoryFromTimezone", 1276 catFromTimezone = new Transform<String, String>() { 1277 public String transform(String source0) { 1278 String territory = Containment.getRegionFromZone(source0); 1279 if (territory == null) { 1280 territory = "ZZ"; 1281 } 1282 return catFromTerritory.transform(territory); 1283 } 1284 }); 1285 functionMap.put("timeZonePage", new Transform<String, String>() { 1286 Set<String> singlePageTerritories = new HashSet<String>(Arrays.asList("AQ", "RU", "ZZ")); 1287 1288 public String transform(String source0) { 1289 String theTerritory = Containment.getRegionFromZone(source0); 1290 if (theTerritory == null || theTerritory == "001") { 1291 theTerritory = "ZZ"; 1292 } 1293 if (singlePageTerritories.contains(theTerritory)) { 1294 return englishFile.getName(CLDRFile.TERRITORY_NAME, theTerritory); 1295 } 1296 String theContinent = Containment.getContinent(theTerritory); 1297 final String subcontinent = Containment.getSubcontinent(theTerritory); 1298 String theSubContinent; 1299 switch (Integer.valueOf(theContinent)) { 1300 case 9: // Oceania - For the timeZonePage, we group Australasia on one page, and the rest of Oceania on the other. 1301 try { 1302 theSubContinent = subcontinent.equals("053") ? "053" : "009"; // was Integer.valueOf(subcontinent) == 53 1303 } catch (NumberFormatException ex) { 1304 theSubContinent = "009"; 1305 } 1306 return englishFile.getName(CLDRFile.TERRITORY_NAME, theSubContinent); 1307 case 19: // Americas - For the timeZonePage, we just group North America & South America 1308 theSubContinent = Integer.valueOf(subcontinent) == 5 ? "005" : "003"; 1309 return englishFile.getName(CLDRFile.TERRITORY_NAME, theSubContinent); 1310 case 142: // Asia 1311 return englishFile.getName(CLDRFile.TERRITORY_NAME, subcontinent); 1312 default: 1313 return englishFile.getName(CLDRFile.TERRITORY_NAME, theContinent); 1314 } 1315 } 1316 }); 1317 1318 functionMap.put("timezoneSorting", new Transform<String, String>() { 1319 public String transform(String source) { 1320 final List<String> codeValues = Arrays.asList( 1321 "generic-long", 1322 "generic-short", 1323 "standard-long", 1324 "standard-short", 1325 "daylight-long", 1326 "daylight-short"); 1327 if (codeValues.contains(source)) { 1328 order = codeValues.indexOf(source); 1329 } else { 1330 order = codeValues.size(); 1331 } 1332 return source; 1333 } 1334 }); 1335 1336 functionMap.put("tzdpField", new Transform<String, String>() { 1337 public String transform(String source) { 1338 Map<String, String> fieldNames = Builder.with(new HashMap<String, String>()) 1339 .put("regionFormat", "Region Format - Generic") 1340 .put("regionFormat-standard", "Region Format - Standard") 1341 .put("regionFormat-daylight", "Region Format - Daylight") 1342 .put("gmtFormat", "GMT Format") 1343 .put("hourFormat", "GMT Hours/Minutes Format") 1344 .put("gmtZeroFormat", "GMT Zero Format") 1345 .put("fallbackFormat", "Location Fallback Format") 1346 .freeze(); 1347 final List<String> fieldOrder = Arrays.asList( 1348 "regionFormat", 1349 "regionFormat-standard", 1350 "regionFormat-daylight", 1351 "gmtFormat", 1352 "hourFormat", 1353 "gmtZeroFormat", 1354 "fallbackFormat"); 1355 1356 if (fieldOrder.contains(source)) { 1357 order = fieldOrder.indexOf(source); 1358 } else { 1359 order = fieldOrder.size(); 1360 } 1361 1362 String result = fieldNames.get(source); 1363 return result == null ? source : result; 1364 } 1365 }); 1366 functionMap.put("unit", new Transform<String, String>() { 1367 public String transform(String source) { 1368 int m = unitOrder.indexOf(source); 1369 order = m; 1370 return source.substring(source.indexOf('-') + 1); 1371 } 1372 }); 1373 1374 functionMap.put("numericSort", new Transform<String, String>() { 1375 // Probably only works well for small values, like -5 through +4. 1376 public String transform(String source) { 1377 Integer pos = Integer.valueOf(source) + 5; 1378 suborder = new SubstringOrder(pos.toString()); 1379 return source; 1380 } 1381 }); 1382 1383 functionMap.put("metazone", new Transform<String, String>() { 1384 1385 public String transform(String source) { 1386 if (PathHeader.UNIFORM_CONTINENTS) { 1387 String container = getMetazonePageTerritory(source); 1388 order = Containment.getOrder(container); 1389 return englishFile.getName(CLDRFile.TERRITORY_NAME, container); 1390 } else { 1391 String continent = metazoneToContinent.get(source); 1392 if (continent == null) { 1393 continent = "UnknownT"; 1394 } 1395 return continent; 1396 } 1397 } 1398 }); 1399 1400 Object[][] ctto = { 1401 { "BUK", "MM" }, 1402 { "CSD", "RS" }, 1403 { "CSK", "CZ" }, 1404 { "DDM", "DE" }, 1405 { "EUR", "ZZ" }, 1406 { "RHD", "ZW" }, 1407 { "SUR", "RU" }, 1408 { "TPE", "TL" }, 1409 { "XAG", "ZZ" }, 1410 { "XAU", "ZZ" }, 1411 { "XBA", "ZZ" }, 1412 { "XBB", "ZZ" }, 1413 { "XBC", "ZZ" }, 1414 { "XBD", "ZZ" }, 1415 { "XDR", "ZZ" }, 1416 { "XEU", "ZZ" }, 1417 { "XFO", "ZZ" }, 1418 { "XFU", "ZZ" }, 1419 { "XPD", "ZZ" }, 1420 { "XPT", "ZZ" }, 1421 { "XRE", "ZZ" }, 1422 { "XSU", "ZZ" }, 1423 { "XTS", "ZZ" }, 1424 { "XUA", "ZZ" }, 1425 { "XXX", "ZZ" }, 1426 { "YDD", "YE" }, 1427 { "YUD", "RS" }, 1428 { "YUM", "RS" }, 1429 { "YUN", "RS" }, 1430 { "YUR", "RS" }, 1431 { "ZRN", "CD" }, 1432 { "ZRZ", "CD" }, 1433 }; 1434 1435 Object[][] sctc = { 1436 { "Northern America", "North America (C)" }, 1437 { "Central America", "North America (C)" }, 1438 { "Caribbean", "North America (C)" }, 1439 { "South America", "South America (C)" }, 1440 { "Northern Africa", "Northern Africa" }, 1441 { "Western Africa", "Western Africa" }, 1442 { "Middle Africa", "Middle Africa" }, 1443 { "Eastern Africa", "Eastern Africa" }, 1444 { "Southern Africa", "Southern Africa" }, 1445 { "Europe", "Northern/Western Europe" }, 1446 { "Northern Europe", "Northern/Western Europe" }, 1447 { "Western Europe", "Northern/Western Europe" }, 1448 { "Eastern Europe", "Southern/Eastern Europe" }, 1449 { "Southern Europe", "Southern/Eastern Europe" }, 1450 { "Western Asia", "Western Asia (C)" }, 1451 { "Central Asia", "Central Asia (C)" }, 1452 { "Eastern Asia", "Eastern Asia (C)" }, 1453 { "Southern Asia", "Southern Asia (C)" }, 1454 { "Southeast Asia", "Southeast Asia (C)" }, 1455 { "Australasia", "Oceania (C)" }, 1456 { "Melanesia", "Oceania (C)" }, 1457 { "Micronesian Region", "Oceania (C)" }, // HACK 1458 { "Polynesia", "Oceania (C)" }, 1459 { "Unknown Region", "Unknown Region (C)" }, 1460 }; 1461 1462 final Map<String, String> currencyToTerritoryOverrides = CldrUtility.asMap(ctto); 1463 final Map<String, String> subContinentToContinent = CldrUtility.asMap(sctc); 1464 final Set<String> fundCurrencies = new HashSet<String>(Arrays.asList("CHE", "CHW", "CLF", "COU", "ECV", "MXV", "USN", "USS", "UYI", "XEU", "ZAL")); 1465 final Set<String> offshoreCurrencies = new HashSet<String>(Arrays.asList("CNH")); 1466 // TODO: Put this into supplementalDataInfo ? 1467 1468 functionMap.put("categoryFromCurrency", new Transform<String, String>() { 1469 public String transform(String source0) { 1470 String tenderOrNot = ""; 1471 String territory = likelySubtags.getLikelyTerritoryFromCurrency(source0); 1472 if (territory == null) { 1473 String tag; 1474 if (fundCurrencies.contains(source0)) { 1475 tag = " (fund)"; 1476 } else if (offshoreCurrencies.contains(source0)) { 1477 tag = " (offshore)"; 1478 } else { 1479 tag = " (old)"; 1480 } 1481 tenderOrNot = ": " + source0 + tag; 1482 } 1483 if (currencyToTerritoryOverrides.keySet().contains(source0)) { 1484 territory = currencyToTerritoryOverrides.get(source0); 1485 } else if (territory == null) { 1486 territory = source0.substring(0, 2); 1487 } 1488 1489 if (territory.equals("ZZ")) { 1490 order = 999; 1491 return englishFile.getName(CLDRFile.TERRITORY_NAME, territory) + ": " + source0; 1492 } else { 1493 return catFromTerritory.transform(territory) + ": " 1494 + englishFile.getName(CLDRFile.TERRITORY_NAME, territory) 1495 + tenderOrNot; 1496 } 1497 } 1498 }); 1499 functionMap.put("continentFromCurrency", new Transform<String, String>() { 1500 public String transform(String source0) { 1501 String subContinent; 1502 String territory = likelySubtags.getLikelyTerritoryFromCurrency(source0); 1503 if (currencyToTerritoryOverrides.keySet().contains(source0)) { 1504 territory = currencyToTerritoryOverrides.get(source0); 1505 } else if (territory == null) { 1506 territory = source0.substring(0, 2); 1507 } 1508 1509 if (territory.equals("ZZ")) { 1510 order = 999; 1511 subContinent = englishFile.getName(CLDRFile.TERRITORY_NAME, territory); 1512 } else { 1513 subContinent = catFromTerritory.transform(territory); 1514 } 1515 1516 String result = subContinentToContinent.get(subContinent); //the continent is the last word in the territory representation 1517 return result; 1518 } 1519 }); 1520 functionMap.put("numberingSystem", new Transform<String, String>() { 1521 public String transform(String source0) { 1522 if ("latn".equals(source0)) { 1523 return ""; 1524 } 1525 String displayName = englishFile.getStringValue("//ldml/localeDisplayNames/types/type[@key=\"numbers\"][@type=\"" 1526 + source0 + "\"]"); 1527 return "using " + (displayName == null ? source0 : displayName + " (" + source0 + ")"); 1528 } 1529 }); 1530 1531 functionMap.put("datefield", new Transform<String, String>() { 1532 private final String[] datefield = { 1533 "era", "era-short", "era-narrow", 1534 "century", "century-short", "century-narrow", 1535 "year", "year-short", "year-narrow", 1536 "quarter", "quarter-short", "quarter-narrow", 1537 "month", "month-short", "month-narrow", 1538 "week", "week-short", "week-narrow", 1539 "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow", 1540 "day", "day-short", "day-narrow", 1541 "dayOfYear", "dayOfYear-short", "dayOfYear-narrow", 1542 "weekday", "weekday-short", "weekday-narrow", 1543 "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow", 1544 "dayperiod", "dayperiod-short", "dayperiod-narrow", 1545 "zone", "zone-short", "zone-narrow", 1546 "hour", "hour-short", "hour-narrow", 1547 "minute", "minute-short", "minute-narrow", 1548 "second", "second-short", "second-narrow", 1549 "millisecond", "millisecond-short", "millisecond-narrow", 1550 "microsecond", "microsecond-short", "microsecond-narrow", 1551 "nanosecond", "nanosecond-short", "nanosecond-narrow", 1552 1553 }; 1554 1555 public String transform(String source) { 1556 order = getIndex(source, datefield); 1557 return source; 1558 } 1559 }); 1560 // //ldml/dates/fields/field[@type="%A"]/relative[@type="%A"] 1561 functionMap.put("relativeDate", new Transform<String, String>() { 1562 private final String[] relativeDateField = { 1563 "year", "year-short", "year-narrow", 1564 "quarter", "quarter-short", "quarter-narrow", 1565 "month", "month-short", "month-narrow", 1566 "week", "week-short", "week-narrow", 1567 "day", "day-short", "day-narrow", 1568 "hour", "hour-short", "hour-narrow", 1569 "minute", "minute-short", "minute-narrow", 1570 "second", "second-short", "second-narrow", 1571 "sun", "sun-short", "sun-narrow", 1572 "mon", "mon-short", "mon-narrow", 1573 "tue", "tue-short", "tue-narrow", 1574 "wed", "wed-short", "wed-narrow", 1575 "thu", "thu-short", "thu-narrow", 1576 "fri", "fri-short", "fri-narrow", 1577 "sat", "sat-short", "sat-narrow", 1578 }; 1579 private final String[] longNames = { 1580 "Year", "Year Short", "Year Narrow", 1581 "Quarter", "Quarter Short", "Quarter Narrow", 1582 "Month", "Month Short", "Month Narrow", 1583 "Week", "Week Short", "Week Narrow", 1584 "Day", "Day Short", "Day Narrow", 1585 "Hour", "Hour Short", "Hour Narrow", 1586 "Minute", "Minute Short", "Minute Narrow", 1587 "Second", "Second Short", "Second Narrow", 1588 "Sunday", "Sunday Short", "Sunday Narrow", 1589 "Monday", "Monday Short", "Monday Narrow", 1590 "Tuesday", "Tuesday Short", "Tuesday Narrow", 1591 "Wednesday", "Wednesday Short", "Wednesday Narrow", 1592 "Thursday", "Thursday Short", "Thursday Narrow", 1593 "Friday", "Friday Short", "Friday Narrow", 1594 "Saturday", "Saturday Short", "Saturday Narrow", 1595 }; 1596 1597 public String transform(String source) { 1598 order = getIndex(source, relativeDateField) + 100; 1599 return "Relative " + longNames[getIndex(source, relativeDateField)]; 1600 } 1601 }); 1602 // Sorts numberSystem items (except for decimal formats). 1603 functionMap.put("number", new Transform<String, String>() { 1604 private final String[] symbols = { "decimal", "group", 1605 "plusSign", "minusSign", "percentSign", "perMille", 1606 "exponential", "superscriptingExponent", 1607 "infinity", "nan", "list", "currencies" 1608 }; 1609 1610 public String transform(String source) { 1611 String[] parts = source.split("-"); 1612 order = getIndex(parts[0], symbols); 1613 // e.g. "currencies-one" 1614 if (parts.length > 1) { 1615 suborder = new SubstringOrder(parts[1]); 1616 } 1617 return source; 1618 } 1619 }); 1620 functionMap.put("numberFormat", new Transform<String, String>() { 1621 public String transform(String source) { 1622 final List<String> fieldOrder = Arrays.asList( 1623 "standard-decimal", 1624 "standard-currency", 1625 "standard-currency-accounting", 1626 "standard-percent", 1627 "standard-scientific"); 1628 1629 if (fieldOrder.contains(source)) { 1630 order = fieldOrder.indexOf(source); 1631 } else { 1632 order = fieldOrder.size(); 1633 } 1634 1635 return source; 1636 } 1637 }); 1638 1639 functionMap.put("localePattern", new Transform<String, String>() { 1640 public String transform(String source) { 1641 // Put localeKeyTypePattern behind localePattern and 1642 // localeSeparator. 1643 if (source.equals("localeKeyTypePattern")) { 1644 order = 10; 1645 } 1646 return source; 1647 } 1648 }); 1649 functionMap.put("listOrder", new Transform<String, String>() { 1650 private String[] listParts = { "2", "start", "middle", "end" }; 1651 1652 @Override 1653 public String transform(String source) { 1654 order = getIndex(source, listParts); 1655 return source; 1656 } 1657 }); 1658 functionMap.put("alphaOrder", new Transform<String, String>() { 1659 @Override 1660 public String transform(String source) { 1661 order = 0; 1662 return source; 1663 } 1664 }); 1665 functionMap.put("transform", new Transform<String, String>() { 1666 Splitter commas = Splitter.on(',').trimResults(); 1667 1668 @Override 1669 public String transform(String source) { 1670 List<String> parts = commas.splitToList(source); 1671 return parts.get(1) 1672 + (parts.get(0).equals("both") ? "↔︎" : "→") 1673 + parts.get(2) 1674 + (parts.size() > 3 ? "/" + parts.get(3) : ""); 1675 } 1676 }); 1677 functionMap.put("major", new Transform<String, String>() { 1678 @Override 1679 public String transform(String source) { 1680 String major = Emoji.getMajorCategory(source); 1681 // check that result is reasonable by running through PageId. 1682 switch(major) { 1683 default: 1684 PageId pageId2 = PageId.forString(major); 1685 if (pageId2.getSectionId() != SectionId.Characters) { 1686 if (pageId2 == PageId.Symbols) { 1687 pageId2 = PageId.Symbols2; 1688 } 1689 } 1690 return pageId2.toString(); 1691 case "Smileys & People": 1692 String minorCat = Emoji.getMinorCategory(source); 1693 if (minorCat.equals("skin-tone") || minorCat.equals("hair-style")) { 1694 return PageId.Component.toString(); 1695 } else if (!minorCat.contains("face")) { 1696 return PageId.People.toString(); 1697 } else { 1698 return PageId.Smileys.toString(); 1699 } 1700 } 1701 } 1702 }); 1703 functionMap.put("minor", new Transform<String, String>() { 1704 @Override 1705 public String transform(String source) { 1706 String minorCat = Emoji.getMinorCategory(source); 1707 order = Emoji.getMinorToOrder(minorCat); 1708 return minorCat; 1709 } 1710 }); 1711 1712 } 1713 1714 private static int getIndex(String item, String[] array) { 1715 for (int i = 0; i < array.length; i++) { 1716 if (item.equals(array[i])) { 1717 return i; 1718 } 1719 } 1720 return -1; 1721 } 1722 1723 private static String getEnglishFirstLetter(String s) { 1724 String languageOnlyPart; 1725 int underscorePos = s.indexOf("_"); 1726 if (underscorePos > 0) { 1727 languageOnlyPart = s.substring(0, underscorePos); 1728 } else { 1729 languageOnlyPart = s; 1730 } 1731 return englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart).substring(0, 1).toUpperCase(); 1732 } 1733 1734 static class HyphenSplitter { 1735 String main; 1736 String extras; 1737 1738 String split(String source) { 1739 int hyphenPos = source.indexOf('-'); 1740 if (hyphenPos < 0) { 1741 main = source; 1742 extras = ""; 1743 } else { 1744 main = source.substring(0, hyphenPos); 1745 extras = source.substring(hyphenPos); 1746 } 1747 return main; 1748 } 1749 } 1750 1751 /** 1752 * This converts "functions", like &month, and sets the order. 1753 * 1754 * @param input 1755 * @param order 1756 * @return 1757 */ 1758 private static String fix(String input, int orderIn) { 1759 if (input.contains("")) { 1760 int debug = 0; 1761 } 1762 String oldInput = input; 1763 input = RegexLookup.replace(input, args.value); 1764 order = orderIn; 1765 suborder = null; 1766 int pos = 0; 1767 while (true) { 1768 int functionStart = input.indexOf('&', pos); 1769 if (functionStart < 0) { 1770 return input; 1771 } 1772 int functionEnd = input.indexOf('(', functionStart); 1773 int argEnd = input.indexOf(')', functionEnd); 1774 Transform<String, String> func = functionMap.get(input.substring(functionStart + 1, 1775 functionEnd)); 1776 final String arg = input.substring(functionEnd + 1, argEnd); 1777 String temp = func.transform(arg); 1778 if (temp == null) { 1779 func.transform(arg); 1780 throw new IllegalArgumentException("Function returns invalid results for «" + arg + "»."); 1781 } 1782 input = input.substring(0, functionStart) + temp + input.substring(argEnd + 1); 1783 pos = functionStart + temp.length(); 1784 } 1785 } 1786 1787 /** 1788 * Collect all the paths for a CLDRFile, and make sure that they have 1789 * cached PathHeaders 1790 * 1791 * @param file 1792 * @return immutable set of paths in the file 1793 */ 1794 public Set<String> pathsForFile(CLDRFile file) { 1795 // make sure we cache all the path headers 1796 Set<String> filePaths = CollectionUtilities.addAll(file.fullIterable().iterator(), new HashSet<String>()); 1797 for (String path : filePaths) { 1798 try { 1799 fromPath(path); // call to make sure cached 1800 } catch (Throwable t) { 1801 // ... some other exception 1802 } 1803 } 1804 return Collections.unmodifiableSet(filePaths); 1805 } 1806 1807 /** 1808 * Returns those regexes that were never matched. 1809 * @return 1810 */ 1811 public Set<String> getUnmatchedRegexes() { 1812 Map<String, RawData> outputUnmatched = new LinkedHashMap<String, RawData>(); 1813 lookup.getUnmatchedPatterns(matchersFound, outputUnmatched); 1814 return outputUnmatched.keySet(); 1815 } 1816 1817 public String getRegexInfo() { 1818 return lookup.toString(); 1819 } 1820 } 1821 1822 /** 1823 * Return the territory used for the title of the Metazone page in the 1824 * Survey Tool. 1825 * 1826 * @param source 1827 * @return 1828 */ 1829 public static String getMetazonePageTerritory(String source) { 1830 String result = metazoneToPageTerritory.get(source); 1831 return result == null ? "ZZ" : result; 1832 } 1833 1834 private static final List<String> COUNTS = Arrays.asList("displayName", "zero", "one", "two", "few", "many", "other", "per"); 1835 1836 private static int alphabeticCompare(String aa, String bb) { 1837 // A frozen Collator is thread-safe. 1838 return alphabetic.compare(aa, bb); 1839 } 1840 1841 public enum BaseUrl { 1842 //http://st.unicode.org/smoketest/survey?_=af&strid=55053dffac611328 1843 //http://st.unicode.org/cldr-apps/survey?_=en&strid=3cd31261bf6738e1 1844 SMOKE("http://st.unicode.org/smoketest/survey"), PRODUCTION("http://st.unicode.org/cldr-apps/survey"); 1845 final String base; 1846 1847 private BaseUrl(String url) { 1848 base = url; 1849 } 1850 } 1851 1852 /** 1853 * @deprecated, use CLDRConfig.urls().forPathHeader() instead. 1854 * @param baseUrl 1855 * @param locale 1856 * @return 1857 */ 1858 public String getUrl(BaseUrl baseUrl, String locale) { 1859 return getUrl(baseUrl.base, locale); 1860 } 1861 1862 /** 1863 * @deprecated, use CLDRConfig.urls().forPathHeader() instead. 1864 * @param baseUrl 1865 * @param locale 1866 * @return 1867 */ 1868 public String getUrl(String baseUrl, String locale) { 1869 return getUrl(baseUrl, locale, getOriginalPath()); 1870 } 1871 1872 /** 1873 * Map http://st.unicode.org/smoketest/survey to http://st.unicode.org/smoketest etc 1874 * @param str 1875 * @return 1876 */ 1877 public static String trimLast(String str) { 1878 int n = str.lastIndexOf('/'); 1879 if (n == -1) return ""; 1880 return str.substring(0, n + 1); 1881 } 1882 1883 /** 1884 * @deprecated use CLDRConfig.urls() 1885 * @param baseUrl 1886 * @param locale 1887 * @param path 1888 * @return 1889 */ 1890 public static String getUrl(String baseUrl, String locale, String path) { 1891 return trimLast(baseUrl) + "v#/" + locale + "//" + StringId.getHexId(path); 1892 } 1893 1894 // eg http://st.unicode.org/cldr-apps/survey?_=fr&x=Locale%20Name%20Patterns 1895 /** 1896 * @deprecated use CLDRConfig.urls() 1897 * @param baseUrl 1898 * @param locale 1899 * @param subsection 1900 * @return 1901 */ 1902 public static String getPageUrl(String baseUrl, String locale, PageId subsection) { 1903 return trimLast(baseUrl) + "v#/" + locale + "/" + subsection + "/"; 1904 } 1905 1906 /** 1907 * @deprecated use CLDRConfig.urls() 1908 * @param baseUrl 1909 * @param file 1910 * @param path 1911 * @return 1912 */ 1913 public static String getLinkedView(String baseUrl, CLDRFile file, String path) { 1914 String value = file.getStringValue(path); 1915 if (value == null) { 1916 return null; 1917 } 1918 return SECTION_LINK + PathHeader.getUrl(baseUrl, file.getLocaleID(), path) + "'><em>view</em></a>"; 1919 } 1920 1921 /** 1922 * If a subdivision, return the (uppercased) territory and if suffix != null, the suffix. Otherwise return the input as is. 1923 * @param input 1924 * @param suffix 1925 * @return 1926 */ 1927 private static String getSubdivisionsTerritory(String input, Output<String> suffix) { 1928 String theTerritory; 1929 if (StandardCodes.LstrType.subdivision.isWellFormed(input)) { 1930 int territoryEnd = input.charAt(0) < 'A' ? 3 : 2; 1931 theTerritory = input.substring(0, territoryEnd).toUpperCase(Locale.ROOT); 1932 if (suffix != null) { 1933 suffix.value = input.substring(territoryEnd); 1934 } 1935 } else { 1936 theTerritory = input; 1937 if (suffix != null) { 1938 suffix.value = ""; 1939 } 1940 } 1941 return theTerritory; 1942 } 1943 } 1944