1 package org.unicode.cldr.util; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.util.ArrayList; 6 import java.util.Arrays; 7 import java.util.Collections; 8 import java.util.Comparator; 9 import java.util.HashSet; 10 import java.util.Iterator; 11 import java.util.List; 12 import java.util.Locale; 13 import java.util.Map; 14 import java.util.Set; 15 import java.util.TreeMap; 16 import java.util.TreeSet; 17 import java.util.regex.Matcher; 18 import java.util.regex.Pattern; 19 20 import com.ibm.icu.util.ICUUncheckedIOException; 21 22 public class ZoneParser { 23 static final boolean DEBUG = false; 24 25 private String version; 26 27 private Map<String, String> zone_to_country; 28 29 private Map<String, Set<String>> country_to_zoneSet; 30 31 /** 32 * @return mapping from zone id to country. If a zone has no country, then XX 33 * is used. 34 */ getZoneToCounty()35 public Map<String, String> getZoneToCounty() { 36 if (zone_to_country == null) 37 make_zone_to_country(); 38 return zone_to_country; 39 } 40 41 /** 42 * @return mapping from country to zoneid. If a zone has no country, then XX 43 * is used. 44 */ getCountryToZoneSet()45 public Map<String, Set<String>> getCountryToZoneSet() { 46 if (country_to_zoneSet == null) 47 make_zone_to_country(); 48 return country_to_zoneSet; 49 } 50 51 /** 52 * @return map from tzids to a list: latitude, longitude, country, comment?. + = 53 * N or E 54 */ getZoneData()55 public Map<String, List<String>> getZoneData() { 56 if (zoneData == null) 57 makeZoneData(); 58 return zoneData; 59 } 60 getDeprecatedZoneIDs()61 public List<String> getDeprecatedZoneIDs() { 62 return Arrays.asList(FIX_DEPRECATED_ZONE_DATA); 63 } 64 65 /** 66 * 67 */ make_zone_to_country()68 private void make_zone_to_country() { 69 zone_to_country = new TreeMap<>(TZIDComparator); 70 country_to_zoneSet = new TreeMap<>(); 71 // Map aliasMap = getAliasMap(); 72 Map<String, List<String>> zoneData = getZoneData(); 73 for (String zone : zoneData.keySet()) { 74 String country = zoneData.get(zone).get(2); 75 zone_to_country.put(zone, country); 76 Set<String> s = country_to_zoneSet.get(country); 77 if (s == null) 78 country_to_zoneSet.put(country, s = new TreeSet<>()); 79 s.add(zone); 80 } 81 /* 82 * Set territories = getAvailableCodes("territory"); for (Iterator it = 83 * territories.iterator(); it.hasNext();) { String code = (String) 84 * it.next(); String[] zones = TimeZone.getAvailableIDs(code); for (int i = 85 * 0; i < zones.length; ++i) { if (aliasMap.get(zones[i]) != null) continue; 86 * zone_to_country.put(zones[i], code); } } String[] zones = 87 * TimeZone.getAvailableIDs(); for (int i = 0; i < zones.length; ++i) { if 88 * (aliasMap.get(zones[i]) != null) continue; if 89 * (zone_to_country.get(zones[i]) == null) { zone_to_country.put(zones[i], 90 * NO_COUNTRY); } } for (Iterator it = zone_to_country.keySet().iterator(); 91 * it.hasNext();) { String tzid = (String) it.next(); String country = 92 * (String) zone_to_country.get(tzid); Set s = (Set) 93 * country_to_zoneSet.get(country); if (s == null) 94 * country_to_zoneSet.put(country, s = new TreeSet()); s.add(tzid); } 95 */ 96 // protect 97 zone_to_country = Collections.unmodifiableMap(zone_to_country); 98 country_to_zoneSet = CldrUtility.protectCollection(country_to_zoneSet); 99 } 100 101 /** 102 * 103 * 104 * private Map bogusZones = null; 105 * 106 * private Map getAliasMap() { if (bogusZones == null) { try { bogusZones = 107 * new TreeMap(); BufferedReader in = 108 * Utility.getUTF8Data"TimeZoneAliases.txt"); while (true) { String line = 109 * in.readLine(); if (line == null) break; line = line.trim(); int pos = 110 * line.indexOf('#'); if (pos >= 0) { skippedAliases.add(line); line = 111 * line.substring(0,pos).trim(); } if (line.length() == 0) continue; List 112 * pieces = Utility.splitList(line,';', true); bogusZones.put(pieces.get(0), 113 * pieces.get(1)); } in.close(); } catch (IOException e) { throw new 114 * IllegalArgumentException("Can't find timezone aliases"); } } return 115 * bogusZones; } 116 */ 117 118 Map<String, List<String>> zoneData; 119 120 Set<String> skippedAliases = new TreeSet<>(); 121 122 /* 123 * # This file contains a table with the following columns: # 1. ISO 3166 124 * 2-character country code. See the file `iso3166.tab'. # 2. Latitude and 125 * longitude of the zone's principal location # in ISO 6709 126 * sign-degrees-minutes-seconds format, # either +-DDMM+-DDDMM or 127 * +-DDMMSS+-DDDMMSS, # first latitude (+ is north), then longitude (+ is 128 * east). # 3. Zone name used in value of TZ environment variable. # 4. 129 * Comments; present if and only if the country has multiple rows. # # Columns 130 * are separated by a single tab. 131 */ parseYear(String year, int defaultValue)132 static int parseYear(String year, int defaultValue) { 133 if ("only".startsWith(year)) 134 return defaultValue; 135 if ("minimum".startsWith(year)) 136 return Integer.MIN_VALUE; 137 if ("maximum".startsWith(year)) 138 return Integer.MAX_VALUE; 139 return Integer.parseInt(year); 140 } 141 142 public static class Time { 143 public int seconds; 144 public byte type; 145 static final byte WALL = 0, STANDARD = 1, UNIVERSAL = 2; 146 Time(String in)147 Time(String in) { 148 if (in.equals("-")) return; // zero/WALL is the default 149 char suffix = in.charAt(in.length() - 1); 150 switch (suffix) { 151 case 'w': 152 in = in.substring(0, in.length() - 1); 153 break; 154 case 's': 155 in = in.substring(0, in.length() - 1); 156 type = STANDARD; 157 break; 158 case 'u': 159 case 'g': 160 case 'z': 161 in = in.substring(0, in.length() - 1); 162 type = UNIVERSAL; 163 break; 164 } 165 seconds = parseSeconds(in, false); 166 } 167 parseSeconds(String in, boolean allowNegative)168 public static int parseSeconds(String in, boolean allowNegative) { 169 boolean negative = false; 170 if (in.startsWith("-")) { 171 assert (allowNegative); 172 negative = true; 173 in = in.substring(1); 174 } 175 String[] pieces = in.split(":"); 176 int multiplier = 3600; 177 int result = 0; 178 for (int i = 0; i < pieces.length; ++i) { 179 result += multiplier * Integer.parseInt(pieces[i]); 180 multiplier /= 60; 181 assert (multiplier >= 0); 182 } 183 if (negative) result = -result; 184 return result; 185 } 186 187 @Override toString()188 public String toString() { 189 return BoilerplateUtilities.toStringHelper(this); 190 } 191 } 192 193 static final String[] months = { "january", "february", "march", "april", "may", "june", "july", "august", 194 "september", "october", "november", "december" }; 195 static final String[] weekdays = { "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday" }; 196 findStartsWith(String value, String[] array, boolean exact)197 static int findStartsWith(String value, String[] array, boolean exact) { 198 value = value.toLowerCase(Locale.ENGLISH); 199 for (int i = 0; i < array.length; ++i) { 200 if (array[i].startsWith(value)) return i; 201 } 202 throw new IllegalArgumentException("Can't find " + value + " in " + Arrays.asList(months)); 203 } 204 205 static Pattern dayPattern = PatternCache.get("([0-9]+)|(last)([a-z]+)|([a-z]+)([<=>]+)([0-9]+)"); 206 static final String[] relations = { "<=", ">=" }; 207 208 public static class Day implements Comparable<Object> { 209 public int number; 210 public byte relation; 211 public int weekDay; 212 static final byte NONE = 0, LEQ = 2, GEQ = 4; 213 Day(String value)214 Day(String value) { 215 value = value.toLowerCase(); 216 Matcher matcher = dayPattern.matcher(value); 217 if (!matcher.matches()) { 218 throw new IllegalArgumentException(); 219 } 220 if (matcher.group(1) != null) { 221 number = Integer.parseInt(matcher.group(1)); 222 return; 223 } 224 if (matcher.group(2) != null) { 225 weekDay = findStartsWith(matcher.group(3), weekdays, false); 226 number = 31; 227 relation = LEQ; 228 return; 229 } 230 if (matcher.group(4) != null) { 231 weekDay = findStartsWith(matcher.group(4), weekdays, false); 232 relation = (byte) findStartsWith(matcher.group(5), relations, false); 233 number = Integer.parseInt(matcher.group(6)); 234 return; 235 } 236 throw new IllegalArgumentException(); 237 } 238 239 @Override toString()240 public String toString() { 241 return BoilerplateUtilities.toStringHelper(this); 242 } 243 244 @Override compareTo(Object other)245 public int compareTo(Object other) { 246 return toString().compareTo(other.toString()); 247 } 248 } 249 250 /** 251 * 252 A rule line has the form 253 * 254 * Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S 255 * 256 * For example: 257 * 258 * Rule US 1967 1973 - Apr lastSun 2:00 1:00 D 259 * 260 * The fields that make up a rule line are: 261 * 262 * NAME Gives the (arbitrary) name of the set of rules this 263 * rule is part of. 264 * 265 * FROM Gives the first year in which the rule applies. Any 266 * integer year can be supplied; the Gregorian calendar 267 * is assumed. The word minimum (or an abbreviation) 268 * means the minimum year representable as an integer. 269 * The word maximum (or an abbreviation) means the 270 * maximum year representable as an integer. Rules can 271 * describe times that are not representable as time 272 * values, with the unrepresentable times ignored; this 273 * allows rules to be portable among hosts with 274 * differing time value types. 275 * 276 * TO Gives the final year in which the rule applies. In 277 * addition to minimum and maximum (as above), the word 278 * only (or an abbreviation) may be used to repeat the 279 * value of the FROM field. 280 * 281 * TYPE Gives the type of year in which the rule applies. 282 * If TYPE is - then the rule applies in all years 283 * between FROM and TO inclusive. If TYPE is something 284 * else, then zic executes the command 285 * yearistype year type 286 * to check the type of a year: an exit status of zero 287 * is taken to mean that the year is of the given type; 288 * an exit status of one is taken to mean that the year 289 * is not of the given type. 290 * 291 * IN Names the month in which the rule takes effect. 292 * Month names may be abbreviated. 293 * 294 * ON Gives the day on which the rule takes effect. 295 * Recognized forms include: 296 * 297 * 5 the fifth of the month 298 * lastSun the last Sunday in the month 299 * lastMon the last Monday in the month 300 * Sun>=8 first Sunday on or after the eighth 301 * Sun<=25 last Sunday on or before the 25th 302 * 303 * Names of days of the week may be abbreviated or 304 * spelled out in full. Note that there must be no 305 * spaces within the ON field. 306 * 307 * AT Gives the time of day at which the rule takes 308 * effect. Recognized forms include: 309 * 310 * 2 time in hours 311 * 2:00 time in hours and minutes 312 * 15:00 24-hour format time (for times after noon) 313 * 1:28:14 time in hours, minutes, and seconds 314 * - equivalent to 0 315 * 316 * where hour 0 is midnight at the start of the day, 317 * and hour 24 is midnight at the end of the day. Any 318 * of these forms may be followed by the letter w if 319 * the given time is local "wall clock" time, s if the 320 * given time is local "standard" time, or u (or g or 321 * z) if the given time is universal time; in the 322 * absence of an indicator, wall clock time is assumed. 323 *** cannot be negative 324 * 325 * SAVE Gives the amount of time to be added to local 326 * standard time when the rule is in effect. This 327 * field has the same format as the AT field (although, 328 * of course, the w and s suffixes are not used). 329 *** can be positive or negative 330 * 331 * LETTER/S 332 * Gives the "variable part" (for example, the "S" or 333 * "D" in "EST" or "EDT") of time zone abbreviations to 334 * be used when this rule is in effect. If this field 335 * is -, the variable part is null. 336 * 337 * 338 * 339 */ 340 341 public static class RuleLine { 342 public static Set<String> types = new TreeSet<>(); 343 public static Set<Day> days = new TreeSet<>(); 344 static Set<Integer> saves = new TreeSet<>(); 345 RuleLine(List<String> l)346 RuleLine(List<String> l) { 347 fromYear = parseYear(l.get(0), 0); 348 toYear = parseYear(l.get(1), fromYear); 349 type = l.get(2); 350 if (type.equals("-")) type = null; 351 month = 1 + findStartsWith(l.get(3), months, false); 352 day = new Day(l.get(4)); 353 time = new Time(l.get(5)); 354 save = Time.parseSeconds(l.get(6), true); 355 letter = l.get(7); 356 if (letter.equals("-")) letter = null; 357 if (type != null) types.add(type); 358 days.add(day); 359 } 360 361 @Override toString()362 public String toString() { 363 return BoilerplateUtilities.toStringHelper(this); 364 } 365 366 public int fromYear; 367 368 public int toYear; 369 370 public String type; 371 372 public int month; 373 374 public Day day; 375 376 public Time time; 377 378 public int save; 379 380 public String letter; 381 382 public static final int FIELD_COUNT = 8; // excluding Rule, Name 383 } 384 385 /** 386 * A zone line has the form 387 * 388 * Zone NAME GMTOFF RULES/SAVE FORMAT [UNTIL] 389 * 390 * For example: 391 * 392 * Zone Australia/Adelaide 9:30 Aus CST 1971 Oct 31 2:00 393 * 394 * The fields that make up a zone line are: 395 * 396 * NAME The name of the time zone. This is the name used in 397 * creating the time conversion information file for the 398 * zone. 399 * 400 * GMTOFF 401 * The amount of time to add to UTC to get standard time 402 * in this zone. This field has the same format as the 403 * AT and SAVE fields of rule lines; begin the field with 404 * a minus sign if time must be subtracted from UTC. 405 * 406 * RULES/SAVE 407 * The name of the rule(s) that apply in the time zone 408 * or, alternately, an amount of time to add to local 409 * standard time. If this field is - then standard time 410 * always applies in the time zone. 411 * 412 * FORMAT 413 * The format for time zone abbreviations in this time 414 * zone. The pair of characters %s is used to show where 415 * the "variable part" of the time zone abbreviation 416 * goes. Alternately, a slash (/) separates standard and 417 * daylight abbreviations. 418 * 419 * UNTIL The time at which the UTC offset or the rule(s) change 420 * for a location. It is specified as a year, a month, a 421 * day, and a time of day. If this is specified, the 422 * time zone information is generated from the given UTC 423 * offset and rule change until the time specified. The 424 * month, day, and time of day have the same format as 425 * the IN, ON, and AT columns of a rule; trailing columns 426 * can be omitted, and default to the earliest possible 427 * value for the missing columns. 428 * 429 * The next line must be a "continuation" line; this has 430 * the same form as a zone line except that the string 431 * "Zone" and the name are omitted, as the continuation 432 * line will place information starting at the time 433 * specified as the UNTIL field in the previous line in 434 * the file used by the previous line. Continuation 435 * lines may contain an UNTIL field, just as zone lines 436 * do, indicating that the next line is a further 437 * continuation. 438 */ 439 public static class ZoneLine { 440 public static Set<Day> untilDays = new TreeSet<>(); 441 public static Set<String> rulesSaves = new TreeSet<>(); 442 ZoneLine(List<String> l)443 ZoneLine(List<String> l) { 444 gmtOff = Time.parseSeconds(l.get(0), true); 445 rulesSave = l.get(1); 446 if (rulesSave.equals("-")) 447 rulesSave = "0"; 448 else if (rulesSave.charAt(0) < 'A') rulesSave = "" + Time.parseSeconds(rulesSave, false); 449 450 format = l.get(2); 451 switch (l.size()) { 452 case 7: 453 untilTime = new Time(l.get(6)); // fall through 454 case 6: 455 untilDay = new Day(l.get(5)); // fall through 456 untilDays.add(untilDay); 457 case 5: 458 untilMonth = 1 + findStartsWith(l.get(4), months, false); // fall through 459 case 4: 460 untilYear = parseYear(l.get(3), Integer.MAX_VALUE); // fall through 461 case 3: 462 break; // ok 463 default: 464 throw new IllegalArgumentException("Wrong field count: " + l); 465 } 466 rulesSaves.add(rulesSave); 467 } 468 469 @Override toString()470 public String toString() { 471 return BoilerplateUtilities.toStringHelper(this); 472 } 473 474 public int gmtOff; 475 476 public String rulesSave; 477 478 public String format; 479 480 public int untilYear = Integer.MAX_VALUE; // indicating continuation 481 482 public int untilMonth; 483 484 public Day untilDay; 485 486 public Time untilTime; 487 488 public String comment; 489 490 public static final int FIELD_COUNT = 3; // excluding Zone, Name 491 492 public static final int FIELD_COUNT_UNTIL = 7; // excluding Zone, Name 493 } 494 495 Map<String, List<RuleLine>> ruleID_rules = new TreeMap<>(); 496 497 Map<String, List<ZoneLine>> zone_rules = new TreeMap<>(); 498 499 Map<String, String> linkold_new = new TreeMap<>(); 500 501 Map<String, Set<String>> linkNew_oldSet = new TreeMap<>(); 502 503 public class Transition { 504 public long date; 505 public long offset; 506 public String abbreviation; 507 } 508 509 public class TransitionList { 510 addTransitions(ZoneLine lastZoneLine, ZoneLine zoneLine, int startYear, int endYear)511 void addTransitions(ZoneLine lastZoneLine, ZoneLine zoneLine, int startYear, int endYear) { 512 // add everything between the zonelines 513 if (lastZoneLine == null) { 514 return; 515 } 516 startYear = Math.max(startYear, lastZoneLine.untilYear); 517 endYear = Math.min(endYear, zoneLine.untilYear); 518 int gmtOffset = lastZoneLine.gmtOff; 519 for (int year = startYear; year <= endYear; ++year) { 520 resolveTime(gmtOffset, lastZoneLine.untilYear, lastZoneLine.untilMonth, 521 lastZoneLine.untilDay, lastZoneLine.untilTime); 522 } 523 } 524 resolveTime(int gmtOffset, int untilYear, int untilMonth, Day untilDay, Time untilTime)525 private long resolveTime(int gmtOffset, int untilYear, int untilMonth, Day untilDay, Time untilTime) { 526 return 0; 527 } 528 } 529 getTransitions(String zoneID, int startYear, int endYear)530 public TransitionList getTransitions(String zoneID, int startYear, int endYear) { 531 TransitionList results = new TransitionList(); 532 List<ZoneLine> rules = zone_rules.get(zoneID); 533 ZoneLine lastZoneLine = null; 534 for (ZoneLine zoneLine : rules) { 535 results.addTransitions(lastZoneLine, zoneLine, startYear, endYear); 536 lastZoneLine = zoneLine; 537 } 538 return results; 539 } 540 getTZIDComparator()541 public Comparator<String> getTZIDComparator() { 542 return TZIDComparator; 543 } 544 545 private static List<String> errorData = Arrays.asList(new String[] { 546 new Double(Double.MIN_VALUE).toString(), new Double(Double.MIN_VALUE).toString(), "" }); 547 548 private Comparator<String> TZIDComparator = new Comparator<>() { 549 Map<String, List<String>> data = getZoneData(); 550 551 @Override 552 public int compare(String s1, String s2) { 553 List<String> data1 = getData(s1); 554 List<String> data2 = getData(s2); 555 int result; 556 // country 557 String country1 = data1.get(2); 558 String country2 = data2.get(2); 559 560 if ((result = country1.compareTo(country2)) != 0) 561 return result; 562 // longitude 563 Double d1 = Double.valueOf(data1.get(1)); 564 Double d2 = Double.valueOf(data2.get(1)); 565 if ((result = d1.compareTo(d2)) != 0) 566 return result; 567 // latitude 568 d1 = Double.valueOf(data1.get(0)); 569 d2 = Double.valueOf(data2.get(0)); 570 if ((result = d1.compareTo(d2)) != 0) 571 return result; 572 // name 573 return s1.compareTo(s2); 574 } 575 576 /** 577 * Get timezone data for the given location 578 * Include work-arounds for missing time zones 579 * 580 * @param s the string like "Australia/Currie" 581 * @return a list of 4 strings for latitude, longitude, country, city 582 * 583 * Reference: https://unicode-org.atlassian.net/browse/CLDR-14428 584 */ 585 private List<String> getData(String s) { 586 List<String> d = data.get(s); 587 if (d == null) { 588 String sNew = linkold_new.get(s); 589 if (sNew != null) { 590 d = data.get(sNew); 591 } 592 if (d == null) { 593 d = errorData; 594 } 595 } 596 return d; 597 } 598 }; 599 600 public static MapComparator<String> regionalCompare = new MapComparator<>(); 601 static { 602 regionalCompare.add("America"); 603 regionalCompare.add("Atlantic"); 604 regionalCompare.add("Europe"); 605 regionalCompare.add("Africa"); 606 regionalCompare.add("Asia"); 607 regionalCompare.add("Indian"); 608 regionalCompare.add("Australia"); 609 regionalCompare.add("Pacific"); 610 regionalCompare.add("Arctic"); 611 regionalCompare.add("Antarctica"); 612 regionalCompare.add("Etc"); 613 } 614 615 private static String[] TZFiles = { "africa", "antarctica", "asia", 616 "australasia", "backward", "etcetera", "europe", "northamerica", 617 "southamerica" }; 618 619 private static Map<String, String> FIX_UNSTABLE_TZIDS; 620 621 private static Set<String> SKIP_LINKS = new HashSet<>(Arrays.asList( 622 new String[] { 623 "America/Montreal", "America/Toronto", 624 "America/Santa_Isabel", "America/Tijuana" })); 625 626 private static Set<String> PREFERRED_BASES = new HashSet<>(Arrays.asList(new String[] { "Europe/London" })); 627 628 private static String[][] ADD_ZONE_ALIASES_DATA = { 629 { "Etc/UCT", "Etc/UTC" }, 630 631 { "EST", "Etc/GMT+5" }, 632 { "MST", "Etc/GMT+7" }, 633 { "HST", "Etc/GMT+10" }, 634 635 { "SystemV/AST4", "Etc/GMT+4" }, 636 { "SystemV/EST5", "Etc/GMT+5" }, 637 { "SystemV/CST6", "Etc/GMT+6" }, 638 { "SystemV/MST7", "Etc/GMT+7" }, 639 { "SystemV/PST8", "Etc/GMT+8" }, 640 { "SystemV/YST9", "Etc/GMT+9" }, 641 { "SystemV/HST10", "Etc/GMT+10" }, 642 }; 643 644 static String[] FIX_DEPRECATED_ZONE_DATA = { 645 "Africa/Timbuktu", 646 "America/Argentina/ComodRivadavia", 647 "America/Santa_Isabel", 648 "Europe/Belfast", 649 "Pacific/Yap", 650 "Antarctica/South_Pole", 651 "America/Shiprock", 652 "America/Montreal", 653 "Asia/Chongqing", 654 "Asia/Harbin", 655 "Asia/Kashgar" 656 }; 657 static { 658 // The format is <new name>, <old name> 659 String[][] FIX_UNSTABLE_TZID_DATA = new String[][] { 660 { "America/Atikokan", "America/Coral_Harbour" }, 661 { "America/Argentina/Buenos_Aires", "America/Buenos_Aires" }, 662 { "America/Argentina/Catamarca", "America/Catamarca" }, 663 { "America/Argentina/Cordoba", "America/Cordoba" }, 664 { "America/Argentina/Jujuy", "America/Jujuy" }, 665 { "America/Argentina/Mendoza", "America/Mendoza" }, 666 { "America/Nuuk", "America/Godthab" }, 667 { "America/Kentucky/Louisville", "America/Louisville" }, 668 { "America/Indiana/Indianapolis", "America/Indianapolis" }, 669 { "Africa/Asmara", "Africa/Asmera" }, 670 { "Atlantic/Faroe", "Atlantic/Faeroe" }, 671 { "Asia/Kolkata", "Asia/Calcutta" }, 672 { "Asia/Ho_Chi_Minh", "Asia/Saigon" }, 673 { "Asia/Yangon", "Asia/Rangoon" }, 674 { "Asia/Kathmandu", "Asia/Katmandu" }, 675 { "Europe/Kyiv", "Europe/Kiev" }, 676 { "Pacific/Pohnpei", "Pacific/Ponape" }, 677 { "Pacific/Chuuk", "Pacific/Truk" }, 678 { "Pacific/Honolulu", "Pacific/Johnston" } 679 }; 680 FIX_UNSTABLE_TZIDS = CldrUtility.asMap(FIX_UNSTABLE_TZID_DATA); 681 } 682 683 // CLDR canonical zone IDs removed from zone.tab are defined here. 684 // When these zones are deprecated in CLDR, remove them from this array. 685 // See CLDR-16049 686 static final String[][] SUPPLEMENTAL_ZONE_ID_DATA = { 687 {"Europe/Uzhgorod", "UA", "+4837+02218"}, // 2022d 688 {"Europe/Zaporozhye", "UA", "+4750+03510"}, // 2022d 689 {"America/Nipigon", "CA", "+4901-08816"}, // 2022f 690 {"America/Rainy_River", "CA", "+4843-09434"}, // 2022f 691 {"America/Thunder_Bay", "CA", "+4823-08915"}, // 2022f 692 {"America/Pangnirtung", "CA", "+6608-06544"}, // 2022g 693 }; 694 695 /** 696 * 697 */ makeZoneData()698 private void makeZoneData() { 699 try { 700 // get version 701 BufferedReader versionIn = CldrUtility.getUTF8Data("tzdb-version.txt"); 702 version = versionIn.readLine(); 703 if (!version.matches("[0-9]{4}[a-z]")) { 704 throw new IllegalArgumentException(String.format("Bad Version number: %s, should be of the form 2007x", 705 version)); 706 } 707 versionIn.close(); 708 709 // String deg = "([+-][0-9]+)";// 710 String deg = "([+-])([0-9][0-9][0-9]?)([0-9][0-9])([0-9][0-9])?";// 711 Matcher m = PatternCache.get(deg + deg).matcher(""); 712 zoneData = new TreeMap<>(); 713 BufferedReader in = CldrUtility.getUTF8Data("zone.tab"); 714 while (true) { 715 String line = in.readLine(); 716 if (line == null) 717 break; 718 line = line.trim(); 719 int pos = line.indexOf('#'); 720 if (pos >= 0) { 721 skippedAliases.add(line); 722 line = line.substring(0, pos).trim(); 723 } 724 if (line.length() == 0) 725 continue; 726 List<String> pieces = CldrUtility.splitList(line, '\t', true); 727 String country = pieces.get(0); 728 String latLong = pieces.get(1); 729 String tzid = pieces.get(2); 730 String ntzid = FIX_UNSTABLE_TZIDS.get(tzid); 731 if (ntzid != null) 732 tzid = ntzid; 733 String comment = pieces.size() < 4 ? null : (String) pieces.get(3); 734 pieces.clear(); 735 if (!m.reset(latLong).matches()) 736 throw new IllegalArgumentException("Bad zone.tab, lat/long format: " 737 + line); 738 739 pieces.add(getDegrees(m, true).toString()); 740 pieces.add(getDegrees(m, false).toString()); 741 pieces.add(country); 742 if (comment != null) 743 pieces.add(comment); 744 if (zoneData.containsKey(tzid)) 745 throw new IllegalArgumentException("Bad zone.tab, duplicate entry: " 746 + line); 747 zoneData.put(tzid, pieces); 748 } 749 in.close(); 750 // add Etcs 751 for (int i = -14; i <= 12; ++i) { 752 List<String> pieces = new ArrayList<>(); 753 int latitude = 0; 754 int longitude = i * 15; 755 if (longitude <= -180) { 756 longitude += 360; 757 } 758 pieces.add(new Double(latitude).toString()); // lat 759 // remember that the sign of the TZIDs is wrong 760 pieces.add(new Double(-longitude).toString()); // long 761 pieces.add(StandardCodes.NO_COUNTRY); // country 762 763 zoneData.put("Etc/GMT" + (i == 0 ? "" : i < 0 ? "" + i : "+" + i), 764 pieces); 765 } 766 // add Unknown / UTC 767 List<String> pieces = new ArrayList<>(); 768 pieces.add(new Double(0).toString()); // lat 769 pieces.add(new Double(0).toString()); // long 770 pieces.add(StandardCodes.NO_COUNTRY); // country 771 zoneData.put("Etc/Unknown", pieces); 772 zoneData.put("Etc/UTC", pieces); 773 774 // add extra zones 775 for (String[] zoneEntry : SUPPLEMENTAL_ZONE_ID_DATA) { 776 List<String> zarray = new ArrayList<>(); 777 if (!m.reset(zoneEntry[2]).matches()) { 778 throw new IllegalArgumentException("Bad zone.tab, lat/long format: " + zoneEntry[2]); 779 } 780 zarray.add(getDegrees(m, true).toString()); 781 zarray.add(getDegrees(m, false).toString()); 782 zarray.add(zoneEntry[1]); 783 zoneData.put(zoneEntry[0], zarray); 784 } 785 786 zoneData = CldrUtility.protectCollection(zoneData); // protect for later 787 788 // now get links 789 Pattern whitespace = PatternCache.get("\\s+"); 790 XEquivalenceClass<String, String> linkedItems = new XEquivalenceClass<>("None"); 791 for (int i = 0; i < TZFiles.length; ++i) { 792 in = CldrUtility.getUTF8Data(TZFiles[i]); 793 String zoneID = null; 794 while (true) { 795 String line = in.readLine(); 796 if (line == null) 797 break; 798 String originalLine = line; 799 int commentPos = line.indexOf("#"); 800 String comment = null; 801 if (commentPos >= 0) { 802 comment = line.substring(commentPos + 1).trim(); 803 line = line.substring(0, commentPos); 804 } 805 line = line.trim(); 806 if (line.length() == 0) 807 continue; 808 String[] items = whitespace.split(line); 809 if (zoneID != null || items[0].equals("Zone")) { 810 List<String> l = new ArrayList<>(); 811 l.addAll(Arrays.asList(items)); 812 813 // Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 15 0:01 814 // 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time 815 if (zoneID == null) { 816 l.remove(0); // "Zone" 817 zoneID = l.get(0); 818 String ntzid = FIX_UNSTABLE_TZIDS.get(zoneID); 819 if (ntzid != null) 820 zoneID = ntzid; 821 l.remove(0); 822 } 823 List<ZoneLine> zoneRules = zone_rules.get(zoneID); 824 if (zoneRules == null) { 825 zoneRules = new ArrayList<>(); 826 zone_rules.put(zoneID, zoneRules); 827 } 828 829 if (l.size() < ZoneLine.FIELD_COUNT 830 || l.size() > ZoneLine.FIELD_COUNT_UNTIL) { 831 System.out.println("***Zone incorrect field count:"); 832 System.out.println(l); 833 System.out.println(originalLine); 834 } 835 836 ZoneLine zoneLine = new ZoneLine(l); 837 zoneLine.comment = comment; 838 zoneRules.add(zoneLine); 839 if (l.size() == ZoneLine.FIELD_COUNT) { 840 zoneID = null; // no continuation line 841 } 842 } else if (items[0].equals("Rule")) { 843 // # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S 844 // Rule Algeria 1916 only - Jun 14 23:00s 1:00 S 845 846 String ruleID = items[1]; 847 List<RuleLine> ruleList = ruleID_rules.get(ruleID); 848 if (ruleList == null) { 849 ruleList = new ArrayList<>(); 850 ruleID_rules.put(ruleID, ruleList); 851 } 852 List<String> l = new ArrayList<>(); 853 l.addAll(Arrays.asList(items)); 854 l.remove(0); 855 l.remove(0); 856 if (l.size() != RuleLine.FIELD_COUNT) { 857 System.out.println("***Rule incorrect field count:"); 858 System.out.println(l); 859 } 860 if (comment != null) 861 l.add(comment); 862 RuleLine ruleLine = new RuleLine(l); 863 ruleList.add(ruleLine); 864 865 } else if (items[0].equals("Link")) { 866 String old = items[2]; 867 String newOne = items[1]; 868 if (!(SKIP_LINKS.contains(old) && SKIP_LINKS.contains(newOne))) { 869 //System.out.println("Original " + old + "\t=>\t" + newOne); 870 linkedItems.add(old, newOne); 871 } 872 /* 873 * String conflict = (String) linkold_new.get(old); if (conflict != 874 * null) { System.out.println("Conflict with old: " + old + " => " + 875 * conflict + ", " + newOne); } System.out.println(old + "\t=>\t" + 876 * newOne); linkold_new.put(old, newOne); 877 */ 878 } else { 879 if (DEBUG) 880 System.out.println("Unknown zone line: " + line); 881 } 882 } 883 in.close(); 884 } 885 // add in stuff that should be links 886 for (int i = 0; i < ADD_ZONE_ALIASES_DATA.length; ++i) { 887 linkedItems.add(ADD_ZONE_ALIASES_DATA[i][0], 888 ADD_ZONE_ALIASES_DATA[i][1]); 889 } 890 891 Set<String> isCanonical = zoneData.keySet(); 892 893 // walk through the sets, and 894 // if any set contains two canonical items, split it. 895 // if any contains one, make it the primary 896 // if any contains zero, problem! 897 for (Set<String> equivalents : linkedItems.getEquivalenceSets()) { 898 Set<String> canonicals = new TreeSet<>(equivalents); 899 canonicals.retainAll(isCanonical); 900 if (canonicals.size() == 0) 901 throw new IllegalArgumentException("No canonicals in: " + equivalents); 902 if (canonicals.size() > 1) { 903 if (DEBUG) { 904 System.out.println("Too many canonicals in: " + equivalents); 905 System.out 906 .println("\t*Don't* put these into the same equivalence class: " 907 + canonicals); 908 } 909 Set<String> remainder = new TreeSet<>(equivalents); 910 remainder.removeAll(isCanonical); 911 if (remainder.size() != 0) { 912 if (DEBUG) { 913 System.out 914 .println("\tThe following should be equivalent to others: " 915 + remainder); 916 } 917 } 918 } 919 { 920 String newOne; 921 // get the item that we want to hang all the aliases off of. 922 // normally this is the first (alphabetically) one, but 923 // it may be overridden with PREFERRED_BASES 924 Set<String> preferredItems = new HashSet<>(PREFERRED_BASES); 925 preferredItems.retainAll(canonicals); 926 if (preferredItems.size() > 0) { 927 newOne = preferredItems.iterator().next(); 928 } else { 929 newOne = canonicals.iterator().next(); 930 } 931 for (String oldOne : equivalents) { 932 if (canonicals.contains(oldOne)) 933 continue; 934 // System.out.println("Mapping " + oldOne + "\t=>\t" + newOne); 935 linkold_new.put(oldOne, newOne); 936 } 937 } 938 } 939 940 /* 941 * // fix the links from old to new, to remove chains for (Iterator it = 942 * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem = 943 * it.next(); Object newItem = linkold_new.get(oldItem); while (true) { 944 * Object linkItem = linkold_new.get(newItem); if (linkItem == null) 945 * break; if (true) System.out.println("Connecting link chain: " + oldItem + 946 * "\t=> " + newItem + "\t=> " + linkItem); newItem = linkItem; 947 * linkold_new.put(oldItem, newItem); } } 948 * // reverse the links *from* canonical names for (Iterator it = 949 * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem = 950 * it.next(); if (!isCanonical.contains(oldItem)) continue; Object newItem = 951 * linkold_new.get(oldItem); } 952 * 953 * // fix unstable TZIDs Set itemsToRemove = new HashSet(); Map 954 * itemsToAdd = new HashMap(); for (Iterator it = 955 * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem = 956 * it.next(); Object newItem = linkold_new.get(oldItem); Object modOldItem = 957 * RESTORE_UNSTABLE_TZIDS.get(oldItem); Object modNewItem = 958 * FIX_UNSTABLE_TZIDS.get(newItem); if (modOldItem == null && modNewItem == 959 * null) continue; if (modOldItem == null) { // just fix old entry 960 * itemsToAdd.put(oldItem, modNewItem); continue; } // otherwise have to 961 * nuke and redo itemsToRemove.add(oldItem); if (modNewItem == null) 962 * modNewItem = newItem; itemsToAdd.put(modOldItem, modNewItem); } // now 963 * make fixes (we couldn't earlier because we were iterating 964 * Utility.removeAll(linkold_new, itemsToRemove); 965 * linkold_new.putAll(itemsToAdd); 966 * // now remove all links that are from canonical zones 967 * Utility.removeAll(linkold_new, zoneData.keySet()); 968 */ 969 970 // generate list of new to old 971 for (Iterator<String> it = linkold_new.keySet().iterator(); it.hasNext();) { 972 String oldZone = it.next(); 973 String newZone = linkold_new.get(oldZone); 974 Set<String> s = linkNew_oldSet.get(newZone); 975 if (s == null) 976 linkNew_oldSet.put(newZone, s = new HashSet<>()); 977 s.add(oldZone); 978 } 979 980 // PROTECT EVERYTHING 981 linkNew_oldSet = CldrUtility.protectCollection(linkNew_oldSet); 982 linkold_new = CldrUtility.protectCollection(linkold_new); 983 ruleID_rules = CldrUtility.protectCollection(ruleID_rules); 984 zone_rules = CldrUtility.protectCollection(zone_rules); 985 // TODO protect zone info later 986 } catch (IOException e) { 987 throw new ICUUncheckedIOException( 988 "Can't find timezone aliases: " + e.toString(), e); 989 } 990 } 991 992 /** 993 * @param m 994 */ 995 private Double getDegrees(Matcher m, boolean lat) { 996 int startIndex = lat ? 1 : 5; 997 double amount = Integer.parseInt(m.group(startIndex + 1)) 998 + Integer.parseInt(m.group(startIndex + 2)) / 60.0; 999 if (m.group(startIndex + 3) != null) 1000 amount += Integer.parseInt(m.group(startIndex + 3)) / 3600.0; 1001 if (m.group(startIndex).equals("-")) 1002 amount = -amount; 1003 return new Double(amount); 1004 } 1005 1006 /** 1007 * @return Returns the linkold_new. 1008 */ 1009 public Map<String, String> getZoneLinkold_new() { 1010 getZoneData(); 1011 return linkold_new; 1012 } 1013 1014 /** 1015 * @return Returns the linkold_new. 1016 */ 1017 public Map<String, Set<String>> getZoneLinkNew_OldSet() { 1018 getZoneData(); 1019 return linkNew_oldSet; 1020 } 1021 1022 /** 1023 * @return Returns the ruleID_rules. 1024 */ 1025 public Map<String, List<RuleLine>> getZoneRuleID_rules() { 1026 getZoneData(); 1027 return ruleID_rules; 1028 } 1029 1030 /** 1031 * @return Returns the zone_rules. 1032 */ 1033 public Map<String, List<ZoneLine>> getZone_rules() { 1034 getZoneData(); 1035 return zone_rules; 1036 } 1037 1038 public String getVersion() { 1039 return version; 1040 } 1041 1042 } 1043