• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collections;
8 import java.util.Comparator;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.Locale;
13 import java.util.Map;
14 import java.util.Set;
15 import java.util.TreeMap;
16 import java.util.TreeSet;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19 
20 import com.ibm.icu.util.ICUUncheckedIOException;
21 
22 public class ZoneParser {
23     static final boolean DEBUG = false;
24 
25     private String version;
26 
27     private Map<String, String> zone_to_country;
28 
29     private Map<String, Set<String>> country_to_zoneSet;
30 
31     /**
32      * @return mapping from zone id to country. If a zone has no country, then XX
33      *         is used.
34      */
getZoneToCounty()35     public Map<String, String> getZoneToCounty() {
36         if (zone_to_country == null)
37             make_zone_to_country();
38         return zone_to_country;
39     }
40 
41     /**
42      * @return mapping from country to zoneid. If a zone has no country, then XX
43      *         is used.
44      */
getCountryToZoneSet()45     public Map<String, Set<String>> getCountryToZoneSet() {
46         if (country_to_zoneSet == null)
47             make_zone_to_country();
48         return country_to_zoneSet;
49     }
50 
51     /**
52      * @return map from tzids to a list: latitude, longitude, country, comment?. + =
53      *         N or E
54      */
getZoneData()55     public Map<String, List<String>> getZoneData() {
56         if (zoneData == null)
57             makeZoneData();
58         return zoneData;
59     }
60 
getDeprecatedZoneIDs()61     public List<String> getDeprecatedZoneIDs() {
62         return Arrays.asList(FIX_DEPRECATED_ZONE_DATA);
63     }
64 
65     /**
66      *
67      */
make_zone_to_country()68     private void make_zone_to_country() {
69         zone_to_country = new TreeMap<>(TZIDComparator);
70         country_to_zoneSet = new TreeMap<>();
71         // Map aliasMap = getAliasMap();
72         Map<String, List<String>> zoneData = getZoneData();
73         for (String zone : zoneData.keySet()) {
74             String country = zoneData.get(zone).get(2);
75             zone_to_country.put(zone, country);
76             Set<String> s = country_to_zoneSet.get(country);
77             if (s == null)
78                 country_to_zoneSet.put(country, s = new TreeSet<>());
79             s.add(zone);
80         }
81         /*
82          * Set territories = getAvailableCodes("territory"); for (Iterator it =
83          * territories.iterator(); it.hasNext();) { String code = (String)
84          * it.next(); String[] zones = TimeZone.getAvailableIDs(code); for (int i =
85          * 0; i < zones.length; ++i) { if (aliasMap.get(zones[i]) != null) continue;
86          * zone_to_country.put(zones[i], code); } } String[] zones =
87          * TimeZone.getAvailableIDs(); for (int i = 0; i < zones.length; ++i) { if
88          * (aliasMap.get(zones[i]) != null) continue; if
89          * (zone_to_country.get(zones[i]) == null) { zone_to_country.put(zones[i],
90          * NO_COUNTRY); } } for (Iterator it = zone_to_country.keySet().iterator();
91          * it.hasNext();) { String tzid = (String) it.next(); String country =
92          * (String) zone_to_country.get(tzid); Set s = (Set)
93          * country_to_zoneSet.get(country); if (s == null)
94          * country_to_zoneSet.put(country, s = new TreeSet()); s.add(tzid); }
95          */
96         // protect
97         zone_to_country = Collections.unmodifiableMap(zone_to_country);
98         country_to_zoneSet = CldrUtility.protectCollection(country_to_zoneSet);
99     }
100 
101     /**
102      *
103      *
104      * private Map bogusZones = null;
105      *
106      * private Map getAliasMap() { if (bogusZones == null) { try { bogusZones =
107      * new TreeMap(); BufferedReader in =
108      * Utility.getUTF8Data"TimeZoneAliases.txt"); while (true) { String line =
109      * in.readLine(); if (line == null) break; line = line.trim(); int pos =
110      * line.indexOf('#'); if (pos >= 0) { skippedAliases.add(line); line =
111      * line.substring(0,pos).trim(); } if (line.length() == 0) continue; List
112      * pieces = Utility.splitList(line,';', true); bogusZones.put(pieces.get(0),
113      * pieces.get(1)); } in.close(); } catch (IOException e) { throw new
114      * IllegalArgumentException("Can't find timezone aliases"); } } return
115      * bogusZones; }
116      */
117 
118     Map<String, List<String>> zoneData;
119 
120     Set<String> skippedAliases = new TreeSet<>();
121 
122     /*
123      * # This file contains a table with the following columns: # 1. ISO 3166
124      * 2-character country code. See the file `iso3166.tab'. # 2. Latitude and
125      * longitude of the zone's principal location # in ISO 6709
126      * sign-degrees-minutes-seconds format, # either +-DDMM+-DDDMM or
127      * +-DDMMSS+-DDDMMSS, # first latitude (+ is north), then longitude (+ is
128      * east). # 3. Zone name used in value of TZ environment variable. # 4.
129      * Comments; present if and only if the country has multiple rows. # # Columns
130      * are separated by a single tab.
131      */
parseYear(String year, int defaultValue)132     static int parseYear(String year, int defaultValue) {
133         if ("only".startsWith(year))
134             return defaultValue;
135         if ("minimum".startsWith(year))
136             return Integer.MIN_VALUE;
137         if ("maximum".startsWith(year))
138             return Integer.MAX_VALUE;
139         return Integer.parseInt(year);
140     }
141 
142     public static class Time {
143         public int seconds;
144         public byte type;
145         static final byte WALL = 0, STANDARD = 1, UNIVERSAL = 2;
146 
Time(String in)147         Time(String in) {
148             if (in.equals("-")) return; // zero/WALL is the default
149             char suffix = in.charAt(in.length() - 1);
150             switch (suffix) {
151             case 'w':
152                 in = in.substring(0, in.length() - 1);
153                 break;
154             case 's':
155                 in = in.substring(0, in.length() - 1);
156                 type = STANDARD;
157                 break;
158             case 'u':
159             case 'g':
160             case 'z':
161                 in = in.substring(0, in.length() - 1);
162                 type = UNIVERSAL;
163                 break;
164             }
165             seconds = parseSeconds(in, false);
166         }
167 
parseSeconds(String in, boolean allowNegative)168         public static int parseSeconds(String in, boolean allowNegative) {
169             boolean negative = false;
170             if (in.startsWith("-")) {
171                 assert (allowNegative);
172                 negative = true;
173                 in = in.substring(1);
174             }
175             String[] pieces = in.split(":");
176             int multiplier = 3600;
177             int result = 0;
178             for (int i = 0; i < pieces.length; ++i) {
179                 result += multiplier * Integer.parseInt(pieces[i]);
180                 multiplier /= 60;
181                 assert (multiplier >= 0);
182             }
183             if (negative) result = -result;
184             return result;
185         }
186 
187         @Override
toString()188         public String toString() {
189             return BoilerplateUtilities.toStringHelper(this);
190         }
191     }
192 
193     static final String[] months = { "january", "february", "march", "april", "may", "june", "july", "august",
194         "september", "october", "november", "december" };
195     static final String[] weekdays = { "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday" };
196 
findStartsWith(String value, String[] array, boolean exact)197     static int findStartsWith(String value, String[] array, boolean exact) {
198         value = value.toLowerCase(Locale.ENGLISH);
199         for (int i = 0; i < array.length; ++i) {
200             if (array[i].startsWith(value)) return i;
201         }
202         throw new IllegalArgumentException("Can't find " + value + " in " + Arrays.asList(months));
203     }
204 
205     static Pattern dayPattern = PatternCache.get("([0-9]+)|(last)([a-z]+)|([a-z]+)([<=>]+)([0-9]+)");
206     static final String[] relations = { "<=", ">=" };
207 
208     public static class Day implements Comparable<Object> {
209         public int number;
210         public byte relation;
211         public int weekDay;
212         static final byte NONE = 0, LEQ = 2, GEQ = 4;
213 
Day(String value)214         Day(String value) {
215             value = value.toLowerCase();
216             Matcher matcher = dayPattern.matcher(value);
217             if (!matcher.matches()) {
218                 throw new IllegalArgumentException();
219             }
220             if (matcher.group(1) != null) {
221                 number = Integer.parseInt(matcher.group(1));
222                 return;
223             }
224             if (matcher.group(2) != null) {
225                 weekDay = findStartsWith(matcher.group(3), weekdays, false);
226                 number = 31;
227                 relation = LEQ;
228                 return;
229             }
230             if (matcher.group(4) != null) {
231                 weekDay = findStartsWith(matcher.group(4), weekdays, false);
232                 relation = (byte) findStartsWith(matcher.group(5), relations, false);
233                 number = Integer.parseInt(matcher.group(6));
234                 return;
235             }
236             throw new IllegalArgumentException();
237         }
238 
239         @Override
toString()240         public String toString() {
241             return BoilerplateUtilities.toStringHelper(this);
242         }
243 
244         @Override
compareTo(Object other)245         public int compareTo(Object other) {
246             return toString().compareTo(other.toString());
247         }
248     }
249 
250     /**
251      *
252      A rule line has the form
253      *
254      * Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
255      *
256      * For example:
257      *
258      * Rule US 1967 1973 - Apr lastSun 2:00 1:00 D
259      *
260      * The fields that make up a rule line are:
261      *
262      * NAME Gives the (arbitrary) name of the set of rules this
263      * rule is part of.
264      *
265      * FROM Gives the first year in which the rule applies. Any
266      * integer year can be supplied; the Gregorian calendar
267      * is assumed. The word minimum (or an abbreviation)
268      * means the minimum year representable as an integer.
269      * The word maximum (or an abbreviation) means the
270      * maximum year representable as an integer. Rules can
271      * describe times that are not representable as time
272      * values, with the unrepresentable times ignored; this
273      * allows rules to be portable among hosts with
274      * differing time value types.
275      *
276      * TO Gives the final year in which the rule applies. In
277      * addition to minimum and maximum (as above), the word
278      * only (or an abbreviation) may be used to repeat the
279      * value of the FROM field.
280      *
281      * TYPE Gives the type of year in which the rule applies.
282      * If TYPE is - then the rule applies in all years
283      * between FROM and TO inclusive. If TYPE is something
284      * else, then zic executes the command
285      * yearistype year type
286      * to check the type of a year: an exit status of zero
287      * is taken to mean that the year is of the given type;
288      * an exit status of one is taken to mean that the year
289      * is not of the given type.
290      *
291      * IN Names the month in which the rule takes effect.
292      * Month names may be abbreviated.
293      *
294      * ON Gives the day on which the rule takes effect.
295      * Recognized forms include:
296      *
297      * 5 the fifth of the month
298      * lastSun the last Sunday in the month
299      * lastMon the last Monday in the month
300      * Sun>=8 first Sunday on or after the eighth
301      * Sun<=25 last Sunday on or before the 25th
302      *
303      * Names of days of the week may be abbreviated or
304      * spelled out in full. Note that there must be no
305      * spaces within the ON field.
306      *
307      * AT Gives the time of day at which the rule takes
308      * effect. Recognized forms include:
309      *
310      * 2 time in hours
311      * 2:00 time in hours and minutes
312      * 15:00 24-hour format time (for times after noon)
313      * 1:28:14 time in hours, minutes, and seconds
314      * - equivalent to 0
315      *
316      * where hour 0 is midnight at the start of the day,
317      * and hour 24 is midnight at the end of the day. Any
318      * of these forms may be followed by the letter w if
319      * the given time is local "wall clock" time, s if the
320      * given time is local "standard" time, or u (or g or
321      * z) if the given time is universal time; in the
322      * absence of an indicator, wall clock time is assumed.
323      *** cannot be negative
324      *
325      * SAVE Gives the amount of time to be added to local
326      * standard time when the rule is in effect. This
327      * field has the same format as the AT field (although,
328      * of course, the w and s suffixes are not used).
329      *** can be positive or negative
330      *
331      * LETTER/S
332      * Gives the "variable part" (for example, the "S" or
333      * "D" in "EST" or "EDT") of time zone abbreviations to
334      * be used when this rule is in effect. If this field
335      * is -, the variable part is null.
336      *
337      *
338      *
339      */
340 
341     public static class RuleLine {
342         public static Set<String> types = new TreeSet<>();
343         public static Set<Day> days = new TreeSet<>();
344         static Set<Integer> saves = new TreeSet<>();
345 
RuleLine(List<String> l)346         RuleLine(List<String> l) {
347             fromYear = parseYear(l.get(0), 0);
348             toYear = parseYear(l.get(1), fromYear);
349             type = l.get(2);
350             if (type.equals("-")) type = null;
351             month = 1 + findStartsWith(l.get(3), months, false);
352             day = new Day(l.get(4));
353             time = new Time(l.get(5));
354             save = Time.parseSeconds(l.get(6), true);
355             letter = l.get(7);
356             if (letter.equals("-")) letter = null;
357             if (type != null) types.add(type);
358             days.add(day);
359         }
360 
361         @Override
toString()362         public String toString() {
363             return BoilerplateUtilities.toStringHelper(this);
364         }
365 
366         public int fromYear;
367 
368         public int toYear;
369 
370         public String type;
371 
372         public int month;
373 
374         public Day day;
375 
376         public Time time;
377 
378         public int save;
379 
380         public String letter;
381 
382         public static final int FIELD_COUNT = 8; // excluding Rule, Name
383     }
384 
385     /**
386      * A zone line has the form
387      *
388      * Zone NAME GMTOFF RULES/SAVE FORMAT [UNTIL]
389      *
390      * For example:
391      *
392      * Zone Australia/Adelaide 9:30 Aus CST 1971 Oct 31 2:00
393      *
394      * The fields that make up a zone line are:
395      *
396      * NAME The name of the time zone. This is the name used in
397      * creating the time conversion information file for the
398      * zone.
399      *
400      * GMTOFF
401      * The amount of time to add to UTC to get standard time
402      * in this zone. This field has the same format as the
403      * AT and SAVE fields of rule lines; begin the field with
404      * a minus sign if time must be subtracted from UTC.
405      *
406      * RULES/SAVE
407      * The name of the rule(s) that apply in the time zone
408      * or, alternately, an amount of time to add to local
409      * standard time. If this field is - then standard time
410      * always applies in the time zone.
411      *
412      * FORMAT
413      * The format for time zone abbreviations in this time
414      * zone. The pair of characters %s is used to show where
415      * the "variable part" of the time zone abbreviation
416      * goes. Alternately, a slash (/) separates standard and
417      * daylight abbreviations.
418      *
419      * UNTIL The time at which the UTC offset or the rule(s) change
420      * for a location. It is specified as a year, a month, a
421      * day, and a time of day. If this is specified, the
422      * time zone information is generated from the given UTC
423      * offset and rule change until the time specified. The
424      * month, day, and time of day have the same format as
425      * the IN, ON, and AT columns of a rule; trailing columns
426      * can be omitted, and default to the earliest possible
427      * value for the missing columns.
428      *
429      * The next line must be a "continuation" line; this has
430      * the same form as a zone line except that the string
431      * "Zone" and the name are omitted, as the continuation
432      * line will place information starting at the time
433      * specified as the UNTIL field in the previous line in
434      * the file used by the previous line. Continuation
435      * lines may contain an UNTIL field, just as zone lines
436      * do, indicating that the next line is a further
437      * continuation.
438      */
439     public static class ZoneLine {
440         public static Set<Day> untilDays = new TreeSet<>();
441         public static Set<String> rulesSaves = new TreeSet<>();
442 
ZoneLine(List<String> l)443         ZoneLine(List<String> l) {
444             gmtOff = Time.parseSeconds(l.get(0), true);
445             rulesSave = l.get(1);
446             if (rulesSave.equals("-"))
447                 rulesSave = "0";
448             else if (rulesSave.charAt(0) < 'A') rulesSave = "" + Time.parseSeconds(rulesSave, false);
449 
450             format = l.get(2);
451             switch (l.size()) {
452             case 7:
453                 untilTime = new Time(l.get(6)); // fall through
454             case 6:
455                 untilDay = new Day(l.get(5)); // fall through
456                 untilDays.add(untilDay);
457             case 5:
458                 untilMonth = 1 + findStartsWith(l.get(4), months, false); // fall through
459             case 4:
460                 untilYear = parseYear(l.get(3), Integer.MAX_VALUE); // fall through
461             case 3:
462                 break; // ok
463             default:
464                 throw new IllegalArgumentException("Wrong field count: " + l);
465             }
466             rulesSaves.add(rulesSave);
467         }
468 
469         @Override
toString()470         public String toString() {
471             return BoilerplateUtilities.toStringHelper(this);
472         }
473 
474         public int gmtOff;
475 
476         public String rulesSave;
477 
478         public String format;
479 
480         public int untilYear = Integer.MAX_VALUE; // indicating continuation
481 
482         public int untilMonth;
483 
484         public Day untilDay;
485 
486         public Time untilTime;
487 
488         public String comment;
489 
490         public static final int FIELD_COUNT = 3; // excluding Zone, Name
491 
492         public static final int FIELD_COUNT_UNTIL = 7; // excluding Zone, Name
493     }
494 
495     Map<String, List<RuleLine>> ruleID_rules = new TreeMap<>();
496 
497     Map<String, List<ZoneLine>> zone_rules = new TreeMap<>();
498 
499     Map<String, String> linkold_new = new TreeMap<>();
500 
501     Map<String, Set<String>> linkNew_oldSet = new TreeMap<>();
502 
503     public class Transition {
504         public long date;
505         public long offset;
506         public String abbreviation;
507     }
508 
509     public class TransitionList {
510 
addTransitions(ZoneLine lastZoneLine, ZoneLine zoneLine, int startYear, int endYear)511         void addTransitions(ZoneLine lastZoneLine, ZoneLine zoneLine, int startYear, int endYear) {
512             // add everything between the zonelines
513             if (lastZoneLine == null) {
514                 return;
515             }
516             startYear = Math.max(startYear, lastZoneLine.untilYear);
517             endYear = Math.min(endYear, zoneLine.untilYear);
518             int gmtOffset = lastZoneLine.gmtOff;
519             for (int year = startYear; year <= endYear; ++year) {
520                 resolveTime(gmtOffset, lastZoneLine.untilYear, lastZoneLine.untilMonth,
521                     lastZoneLine.untilDay, lastZoneLine.untilTime);
522             }
523         }
524 
resolveTime(int gmtOffset, int untilYear, int untilMonth, Day untilDay, Time untilTime)525         private long resolveTime(int gmtOffset, int untilYear, int untilMonth, Day untilDay, Time untilTime) {
526             return 0;
527         }
528     }
529 
getTransitions(String zoneID, int startYear, int endYear)530     public TransitionList getTransitions(String zoneID, int startYear, int endYear) {
531         TransitionList results = new TransitionList();
532         List<ZoneLine> rules = zone_rules.get(zoneID);
533         ZoneLine lastZoneLine = null;
534         for (ZoneLine zoneLine : rules) {
535             results.addTransitions(lastZoneLine, zoneLine, startYear, endYear);
536             lastZoneLine = zoneLine;
537         }
538         return results;
539     }
540 
getTZIDComparator()541     public Comparator<String> getTZIDComparator() {
542         return TZIDComparator;
543     }
544 
545     private static List<String> errorData = Arrays.asList(new String[] {
546         new Double(Double.MIN_VALUE).toString(), new Double(Double.MIN_VALUE).toString(), "" });
547 
548     private Comparator<String> TZIDComparator = new Comparator<>() {
549         Map<String, List<String>> data = getZoneData();
550 
551         @Override
552         public int compare(String s1, String s2) {
553             List<String> data1 = getData(s1);
554             List<String> data2 = getData(s2);
555             int result;
556             // country
557             String country1 = data1.get(2);
558             String country2 = data2.get(2);
559 
560             if ((result = country1.compareTo(country2)) != 0)
561                 return result;
562             // longitude
563             Double d1 = Double.valueOf(data1.get(1));
564             Double d2 = Double.valueOf(data2.get(1));
565             if ((result = d1.compareTo(d2)) != 0)
566                 return result;
567             // latitude
568             d1 = Double.valueOf(data1.get(0));
569             d2 = Double.valueOf(data2.get(0));
570             if ((result = d1.compareTo(d2)) != 0)
571                 return result;
572             // name
573             return s1.compareTo(s2);
574         }
575 
576         /**
577          * Get timezone data for the given location
578          * Include work-arounds for missing time zones
579          *
580          * @param s the string like "Australia/Currie"
581          * @return a list of 4 strings for latitude, longitude, country, city
582          *
583          * Reference: https://unicode-org.atlassian.net/browse/CLDR-14428
584          */
585         private List<String> getData(String s) {
586             List<String> d = data.get(s);
587             if (d == null) {
588                 String sNew = linkold_new.get(s);
589                 if (sNew != null) {
590                     d = data.get(sNew);
591                 }
592                 if (d == null) {
593                     d = errorData;
594                 }
595             }
596             return d;
597         }
598     };
599 
600     public static MapComparator<String> regionalCompare = new MapComparator<>();
601     static {
602         regionalCompare.add("America");
603         regionalCompare.add("Atlantic");
604         regionalCompare.add("Europe");
605         regionalCompare.add("Africa");
606         regionalCompare.add("Asia");
607         regionalCompare.add("Indian");
608         regionalCompare.add("Australia");
609         regionalCompare.add("Pacific");
610         regionalCompare.add("Arctic");
611         regionalCompare.add("Antarctica");
612         regionalCompare.add("Etc");
613     }
614 
615     private static String[] TZFiles = { "africa", "antarctica", "asia",
616         "australasia", "backward", "etcetera", "europe", "northamerica",
617         "southamerica" };
618 
619     private static Map<String, String> FIX_UNSTABLE_TZIDS;
620 
621     private static Set<String> SKIP_LINKS = new HashSet<>(Arrays.asList(
622         new String[] {
623             "America/Montreal", "America/Toronto",
624             "America/Santa_Isabel", "America/Tijuana" }));
625 
626     private static Set<String> PREFERRED_BASES = new HashSet<>(Arrays.asList(new String[] { "Europe/London" }));
627 
628     private static String[][] ADD_ZONE_ALIASES_DATA = {
629         { "Etc/UCT", "Etc/UTC" },
630 
631         { "EST", "Etc/GMT+5" },
632         { "MST", "Etc/GMT+7" },
633         { "HST", "Etc/GMT+10" },
634 
635         { "SystemV/AST4", "Etc/GMT+4" },
636         { "SystemV/EST5", "Etc/GMT+5" },
637         { "SystemV/CST6", "Etc/GMT+6" },
638         { "SystemV/MST7", "Etc/GMT+7" },
639         { "SystemV/PST8", "Etc/GMT+8" },
640         { "SystemV/YST9", "Etc/GMT+9" },
641         { "SystemV/HST10", "Etc/GMT+10" },
642     };
643 
644     static String[] FIX_DEPRECATED_ZONE_DATA = {
645         "Africa/Timbuktu",
646         "America/Argentina/ComodRivadavia",
647         "America/Santa_Isabel",
648         "Europe/Belfast",
649         "Pacific/Yap",
650         "Antarctica/South_Pole",
651         "America/Shiprock",
652         "America/Montreal",
653         "Asia/Chongqing",
654         "Asia/Harbin",
655         "Asia/Kashgar"
656     };
657     static {
658         // The format is <new name>, <old name>
659         String[][] FIX_UNSTABLE_TZID_DATA = new String[][] {
660             { "America/Atikokan", "America/Coral_Harbour" },
661             { "America/Argentina/Buenos_Aires", "America/Buenos_Aires" },
662             { "America/Argentina/Catamarca", "America/Catamarca" },
663             { "America/Argentina/Cordoba", "America/Cordoba" },
664             { "America/Argentina/Jujuy", "America/Jujuy" },
665             { "America/Argentina/Mendoza", "America/Mendoza" },
666             { "America/Nuuk", "America/Godthab" },
667             { "America/Kentucky/Louisville", "America/Louisville" },
668             { "America/Indiana/Indianapolis", "America/Indianapolis" },
669             { "Africa/Asmara", "Africa/Asmera" },
670             { "Atlantic/Faroe", "Atlantic/Faeroe" },
671             { "Asia/Kolkata", "Asia/Calcutta" },
672             { "Asia/Ho_Chi_Minh", "Asia/Saigon" },
673             { "Asia/Yangon", "Asia/Rangoon" },
674             { "Asia/Kathmandu", "Asia/Katmandu" },
675             { "Europe/Kyiv", "Europe/Kiev" },
676             { "Pacific/Pohnpei", "Pacific/Ponape" },
677             { "Pacific/Chuuk", "Pacific/Truk" },
678             { "Pacific/Honolulu", "Pacific/Johnston" }
679         };
680         FIX_UNSTABLE_TZIDS = CldrUtility.asMap(FIX_UNSTABLE_TZID_DATA);
681     }
682 
683     // CLDR canonical zone IDs removed from zone.tab are defined here.
684     // When these zones are deprecated in CLDR, remove them from this array.
685     // See CLDR-16049
686     static final String[][] SUPPLEMENTAL_ZONE_ID_DATA = {
687         {"Europe/Uzhgorod", "UA", "+4837+02218"},       // 2022d
688         {"Europe/Zaporozhye", "UA", "+4750+03510"},     // 2022d
689         {"America/Nipigon", "CA", "+4901-08816"},       // 2022f
690         {"America/Rainy_River", "CA", "+4843-09434"},   // 2022f
691         {"America/Thunder_Bay", "CA", "+4823-08915"},   // 2022f
692         {"America/Pangnirtung", "CA", "+6608-06544"},   // 2022g
693     };
694 
695     /**
696      *
697      */
makeZoneData()698     private void makeZoneData() {
699         try {
700             // get version
701             BufferedReader versionIn = CldrUtility.getUTF8Data("tzdb-version.txt");
702             version = versionIn.readLine();
703             if (!version.matches("[0-9]{4}[a-z]")) {
704                 throw new IllegalArgumentException(String.format("Bad Version number: %s, should be of the form 2007x",
705                     version));
706             }
707             versionIn.close();
708 
709             // String deg = "([+-][0-9]+)";//
710             String deg = "([+-])([0-9][0-9][0-9]?)([0-9][0-9])([0-9][0-9])?";//
711             Matcher m = PatternCache.get(deg + deg).matcher("");
712             zoneData = new TreeMap<>();
713             BufferedReader in = CldrUtility.getUTF8Data("zone.tab");
714             while (true) {
715                 String line = in.readLine();
716                 if (line == null)
717                     break;
718                 line = line.trim();
719                 int pos = line.indexOf('#');
720                 if (pos >= 0) {
721                     skippedAliases.add(line);
722                     line = line.substring(0, pos).trim();
723                 }
724                 if (line.length() == 0)
725                     continue;
726                 List<String> pieces = CldrUtility.splitList(line, '\t', true);
727                 String country = pieces.get(0);
728                 String latLong = pieces.get(1);
729                 String tzid = pieces.get(2);
730                 String ntzid = FIX_UNSTABLE_TZIDS.get(tzid);
731                 if (ntzid != null)
732                     tzid = ntzid;
733                 String comment = pieces.size() < 4 ? null : (String) pieces.get(3);
734                 pieces.clear();
735                 if (!m.reset(latLong).matches())
736                     throw new IllegalArgumentException("Bad zone.tab, lat/long format: "
737                         + line);
738 
739                 pieces.add(getDegrees(m, true).toString());
740                 pieces.add(getDegrees(m, false).toString());
741                 pieces.add(country);
742                 if (comment != null)
743                     pieces.add(comment);
744                 if (zoneData.containsKey(tzid))
745                     throw new IllegalArgumentException("Bad zone.tab, duplicate entry: "
746                         + line);
747                 zoneData.put(tzid, pieces);
748             }
749             in.close();
750             // add Etcs
751             for (int i = -14; i <= 12; ++i) {
752                 List<String> pieces = new ArrayList<>();
753                 int latitude = 0;
754                 int longitude = i * 15;
755                 if (longitude <= -180) {
756                     longitude += 360;
757                 }
758                 pieces.add(new Double(latitude).toString()); // lat
759                 // remember that the sign of the TZIDs is wrong
760                 pieces.add(new Double(-longitude).toString()); // long
761                 pieces.add(StandardCodes.NO_COUNTRY); // country
762 
763                 zoneData.put("Etc/GMT" + (i == 0 ? "" : i < 0 ? "" + i : "+" + i),
764                     pieces);
765             }
766             // add Unknown / UTC
767             List<String> pieces = new ArrayList<>();
768             pieces.add(new Double(0).toString()); // lat
769             pieces.add(new Double(0).toString()); // long
770             pieces.add(StandardCodes.NO_COUNTRY); // country
771             zoneData.put("Etc/Unknown", pieces);
772             zoneData.put("Etc/UTC", pieces);
773 
774             // add extra zones
775             for (String[] zoneEntry : SUPPLEMENTAL_ZONE_ID_DATA) {
776                 List<String> zarray = new ArrayList<>();
777                 if (!m.reset(zoneEntry[2]).matches()) {
778                     throw new IllegalArgumentException("Bad zone.tab, lat/long format: " + zoneEntry[2]);
779                 }
780                 zarray.add(getDegrees(m, true).toString());
781                 zarray.add(getDegrees(m, false).toString());
782                 zarray.add(zoneEntry[1]);
783                 zoneData.put(zoneEntry[0], zarray);
784             }
785 
786             zoneData = CldrUtility.protectCollection(zoneData); // protect for later
787 
788             // now get links
789             Pattern whitespace = PatternCache.get("\\s+");
790             XEquivalenceClass<String, String> linkedItems = new XEquivalenceClass<>("None");
791             for (int i = 0; i < TZFiles.length; ++i) {
792                 in = CldrUtility.getUTF8Data(TZFiles[i]);
793                 String zoneID = null;
794                 while (true) {
795                     String line = in.readLine();
796                     if (line == null)
797                         break;
798                     String originalLine = line;
799                     int commentPos = line.indexOf("#");
800                     String comment = null;
801                     if (commentPos >= 0) {
802                         comment = line.substring(commentPos + 1).trim();
803                         line = line.substring(0, commentPos);
804                     }
805                     line = line.trim();
806                     if (line.length() == 0)
807                         continue;
808                     String[] items = whitespace.split(line);
809                     if (zoneID != null || items[0].equals("Zone")) {
810                         List<String> l = new ArrayList<>();
811                         l.addAll(Arrays.asList(items));
812 
813                         // Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 15 0:01
814                         // 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
815                         if (zoneID == null) {
816                             l.remove(0); // "Zone"
817                             zoneID = l.get(0);
818                             String ntzid = FIX_UNSTABLE_TZIDS.get(zoneID);
819                             if (ntzid != null)
820                                 zoneID = ntzid;
821                             l.remove(0);
822                         }
823                         List<ZoneLine> zoneRules = zone_rules.get(zoneID);
824                         if (zoneRules == null) {
825                             zoneRules = new ArrayList<>();
826                             zone_rules.put(zoneID, zoneRules);
827                         }
828 
829                         if (l.size() < ZoneLine.FIELD_COUNT
830                             || l.size() > ZoneLine.FIELD_COUNT_UNTIL) {
831                             System.out.println("***Zone incorrect field count:");
832                             System.out.println(l);
833                             System.out.println(originalLine);
834                         }
835 
836                         ZoneLine zoneLine = new ZoneLine(l);
837                         zoneLine.comment = comment;
838                         zoneRules.add(zoneLine);
839                         if (l.size() == ZoneLine.FIELD_COUNT) {
840                             zoneID = null; // no continuation line
841                         }
842                     } else if (items[0].equals("Rule")) {
843                         // # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
844                         // Rule Algeria 1916 only - Jun 14 23:00s 1:00 S
845 
846                         String ruleID = items[1];
847                         List<RuleLine> ruleList = ruleID_rules.get(ruleID);
848                         if (ruleList == null) {
849                             ruleList = new ArrayList<>();
850                             ruleID_rules.put(ruleID, ruleList);
851                         }
852                         List<String> l = new ArrayList<>();
853                         l.addAll(Arrays.asList(items));
854                         l.remove(0);
855                         l.remove(0);
856                         if (l.size() != RuleLine.FIELD_COUNT) {
857                             System.out.println("***Rule incorrect field count:");
858                             System.out.println(l);
859                         }
860                         if (comment != null)
861                             l.add(comment);
862                         RuleLine ruleLine = new RuleLine(l);
863                         ruleList.add(ruleLine);
864 
865                     } else if (items[0].equals("Link")) {
866                         String old = items[2];
867                         String newOne = items[1];
868                         if (!(SKIP_LINKS.contains(old) && SKIP_LINKS.contains(newOne))) {
869                             //System.out.println("Original " + old + "\t=>\t" + newOne);
870                             linkedItems.add(old, newOne);
871                         }
872                         /*
873                          * String conflict = (String) linkold_new.get(old); if (conflict !=
874                          * null) { System.out.println("Conflict with old: " + old + " => " +
875                          * conflict + ", " + newOne); } System.out.println(old + "\t=>\t" +
876                          * newOne); linkold_new.put(old, newOne);
877                          */
878                     } else {
879                         if (DEBUG)
880                             System.out.println("Unknown zone line: " + line);
881                     }
882                 }
883                 in.close();
884             }
885             // add in stuff that should be links
886             for (int i = 0; i < ADD_ZONE_ALIASES_DATA.length; ++i) {
887                 linkedItems.add(ADD_ZONE_ALIASES_DATA[i][0],
888                     ADD_ZONE_ALIASES_DATA[i][1]);
889             }
890 
891             Set<String> isCanonical = zoneData.keySet();
892 
893             // walk through the sets, and
894             // if any set contains two canonical items, split it.
895             // if any contains one, make it the primary
896             // if any contains zero, problem!
897             for (Set<String> equivalents : linkedItems.getEquivalenceSets()) {
898                 Set<String> canonicals = new TreeSet<>(equivalents);
899                 canonicals.retainAll(isCanonical);
900                 if (canonicals.size() == 0)
901                     throw new IllegalArgumentException("No canonicals in: " + equivalents);
902                 if (canonicals.size() > 1) {
903                     if (DEBUG) {
904                         System.out.println("Too many canonicals in: " + equivalents);
905                         System.out
906                             .println("\t*Don't* put these into the same equivalence class: "
907                                 + canonicals);
908                     }
909                     Set<String> remainder = new TreeSet<>(equivalents);
910                     remainder.removeAll(isCanonical);
911                     if (remainder.size() != 0) {
912                         if (DEBUG) {
913                             System.out
914                                 .println("\tThe following should be equivalent to others: "
915                                     + remainder);
916                         }
917                     }
918                 }
919                 {
920                     String newOne;
921                     // get the item that we want to hang all the aliases off of.
922                     // normally this is the first (alphabetically) one, but
923                     // it may be overridden with PREFERRED_BASES
924                     Set<String> preferredItems = new HashSet<>(PREFERRED_BASES);
925                     preferredItems.retainAll(canonicals);
926                     if (preferredItems.size() > 0) {
927                         newOne = preferredItems.iterator().next();
928                     } else {
929                         newOne = canonicals.iterator().next();
930                     }
931                     for (String oldOne : equivalents) {
932                         if (canonicals.contains(oldOne))
933                             continue;
934                         // System.out.println("Mapping " + oldOne + "\t=>\t" + newOne);
935                         linkold_new.put(oldOne, newOne);
936                     }
937                 }
938             }
939 
940             /*
941              * // fix the links from old to new, to remove chains for (Iterator it =
942              * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem =
943              * it.next(); Object newItem = linkold_new.get(oldItem); while (true) {
944              * Object linkItem = linkold_new.get(newItem); if (linkItem == null)
945              * break; if (true) System.out.println("Connecting link chain: " + oldItem +
946              * "\t=> " + newItem + "\t=> " + linkItem); newItem = linkItem;
947              * linkold_new.put(oldItem, newItem); } }
948              * // reverse the links *from* canonical names for (Iterator it =
949              * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem =
950              * it.next(); if (!isCanonical.contains(oldItem)) continue; Object newItem =
951              * linkold_new.get(oldItem); }
952              *
953              * // fix unstable TZIDs Set itemsToRemove = new HashSet(); Map
954              * itemsToAdd = new HashMap(); for (Iterator it =
955              * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem =
956              * it.next(); Object newItem = linkold_new.get(oldItem); Object modOldItem =
957              * RESTORE_UNSTABLE_TZIDS.get(oldItem); Object modNewItem =
958              * FIX_UNSTABLE_TZIDS.get(newItem); if (modOldItem == null && modNewItem ==
959              * null) continue; if (modOldItem == null) { // just fix old entry
960              * itemsToAdd.put(oldItem, modNewItem); continue; } // otherwise have to
961              * nuke and redo itemsToRemove.add(oldItem); if (modNewItem == null)
962              * modNewItem = newItem; itemsToAdd.put(modOldItem, modNewItem); } // now
963              * make fixes (we couldn't earlier because we were iterating
964              * Utility.removeAll(linkold_new, itemsToRemove);
965              * linkold_new.putAll(itemsToAdd);
966              * // now remove all links that are from canonical zones
967              * Utility.removeAll(linkold_new, zoneData.keySet());
968              */
969 
970             // generate list of new to old
971             for (Iterator<String> it = linkold_new.keySet().iterator(); it.hasNext();) {
972                 String oldZone = it.next();
973                 String newZone = linkold_new.get(oldZone);
974                 Set<String> s = linkNew_oldSet.get(newZone);
975                 if (s == null)
976                     linkNew_oldSet.put(newZone, s = new HashSet<>());
977                 s.add(oldZone);
978             }
979 
980             // PROTECT EVERYTHING
981             linkNew_oldSet = CldrUtility.protectCollection(linkNew_oldSet);
982             linkold_new = CldrUtility.protectCollection(linkold_new);
983             ruleID_rules = CldrUtility.protectCollection(ruleID_rules);
984             zone_rules = CldrUtility.protectCollection(zone_rules);
985             // TODO protect zone info later
986         } catch (IOException e) {
987             throw new ICUUncheckedIOException(
988                 "Can't find timezone aliases: " + e.toString(), e);
989         }
990     }
991 
992     /**
993      * @param m
994      */
995     private Double getDegrees(Matcher m, boolean lat) {
996         int startIndex = lat ? 1 : 5;
997         double amount = Integer.parseInt(m.group(startIndex + 1))
998             + Integer.parseInt(m.group(startIndex + 2)) / 60.0;
999         if (m.group(startIndex + 3) != null)
1000             amount += Integer.parseInt(m.group(startIndex + 3)) / 3600.0;
1001         if (m.group(startIndex).equals("-"))
1002             amount = -amount;
1003         return new Double(amount);
1004     }
1005 
1006     /**
1007      * @return Returns the linkold_new.
1008      */
1009     public Map<String, String> getZoneLinkold_new() {
1010         getZoneData();
1011         return linkold_new;
1012     }
1013 
1014     /**
1015      * @return Returns the linkold_new.
1016      */
1017     public Map<String, Set<String>> getZoneLinkNew_OldSet() {
1018         getZoneData();
1019         return linkNew_oldSet;
1020     }
1021 
1022     /**
1023      * @return Returns the ruleID_rules.
1024      */
1025     public Map<String, List<RuleLine>> getZoneRuleID_rules() {
1026         getZoneData();
1027         return ruleID_rules;
1028     }
1029 
1030     /**
1031      * @return Returns the zone_rules.
1032      */
1033     public Map<String, List<ZoneLine>> getZone_rules() {
1034         getZoneData();
1035         return zone_rules;
1036     }
1037 
1038     public String getVersion() {
1039         return version;
1040     }
1041 
1042 }
1043