1
2 /*
3 **********************************************************************
4 * Copyright (c) 2003-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 **********************************************************************
7 * Author: Alan Liu
8 * Created: July 10 2003
9 * Since: ICU 2.8
10 **********************************************************************
11 */
12 #include "tzfile.h" // from Olson tzcode archive, copied to this dir
13
14 #ifdef WIN32
15
16 #include <windows.h>
17 #undef min // windows.h/STL conflict
18 #undef max // windows.h/STL conflict
19 // "identifier was truncated to 'number' characters" warning
20 #pragma warning(disable: 4786)
21
22 #else
23
24 #include <unistd.h>
25 #include <stdio.h>
26 #include <dirent.h>
27 #include <string.h>
28 #include <sys/stat.h>
29
30 #endif
31
32 #include <algorithm>
33 #include <cassert>
34 #include <ctime>
35 #include <fstream>
36 #include <iomanip>
37 #include <iostream>
38 #include <iterator>
39 #include <limits>
40 #include <map>
41 #include <set>
42 #include <sstream>
43 #include <sstream>
44 #include <stdexcept>
45 #include <string>
46 #include <vector>
47
48 #include "tz2icu.h"
49 #include "unicode/uversion.h"
50
51 #define USE64BITDATA
52
53 using namespace std;
54
55 //--------------------------------------------------------------------
56 // Time utilities
57 //--------------------------------------------------------------------
58
59 const int64_t SECS_PER_YEAR = 31536000; // 365 days
60 const int64_t SECS_PER_LEAP_YEAR = 31622400; // 366 days
61 const int64_t LOWEST_TIME32 = (int64_t)((int32_t)0x80000000);
62
isLeap(int32_t y)63 bool isLeap(int32_t y) {
64 return (y%4 == 0) && ((y%100 != 0) || (y%400 == 0)); // Gregorian
65 }
66
secsPerYear(int32_t y)67 int64_t secsPerYear(int32_t y) {
68 return isLeap(y) ? SECS_PER_LEAP_YEAR : SECS_PER_YEAR;
69 }
70
71 /**
72 * Given a calendar year, return the GMT epoch seconds for midnight
73 * GMT of January 1 of that year. yearToSeconds(1970) == 0.
74 */
yearToSeconds(int32_t year)75 int64_t yearToSeconds(int32_t year) {
76 // inefficient but foolproof
77 int64_t s = 0;
78 int32_t y = 1970;
79 while (y < year) {
80 s += secsPerYear(y++);
81 }
82 while (y > year) {
83 s -= secsPerYear(--y);
84 }
85 return s;
86 }
87
88 /**
89 * Given 1970 GMT epoch seconds, return the calendar year containing
90 * that time. secondsToYear(0) == 1970.
91 */
secondsToYear(int64_t seconds)92 int32_t secondsToYear(int64_t seconds) {
93 // inefficient but foolproof
94 int32_t y = 1970;
95 int64_t s = 0;
96 if (seconds >= 0) {
97 for (;;) {
98 s += secsPerYear(y++);
99 if (s > seconds) break;
100 }
101 --y;
102 } else {
103 for (;;) {
104 s -= secsPerYear(--y);
105 if (s <= seconds) break;
106 }
107 }
108 return y;
109 }
110
111 //--------------------------------------------------------------------
112 // Types
113 //--------------------------------------------------------------------
114
115 struct FinalZone;
116 struct FinalRule;
117 struct SimplifiedZoneType;
118
119 // A transition from one ZoneType to another
120 // Minimal size = 5 bytes (4+1)
121 struct Transition {
122 int64_t time; // seconds, 1970 epoch
123 int32_t type; // index into 'ZoneInfo.types' 0..255
TransitionTransition124 Transition(int64_t _time, int32_t _type) {
125 time = _time;
126 type = _type;
127 }
128 };
129
130 // A behavior mode (what zic calls a 'type') of a time zone.
131 // Minimal size = 6 bytes (4+1+3bits)
132 // SEE: SimplifiedZoneType
133 struct ZoneType {
134 int64_t rawoffset; // raw seconds offset from GMT
135 int64_t dstoffset; // dst seconds offset from GMT
136
137 // We don't really need any of the following, but they are
138 // retained for possible future use. See SimplifiedZoneType.
139 int32_t abbr; // index into ZoneInfo.abbrs 0..n-1
140 bool isdst;
141 bool isstd;
142 bool isgmt;
143
144 ZoneType(const SimplifiedZoneType&); // used by optimizeTypeList
145
ZoneTypeZoneType146 ZoneType() : rawoffset(-1), dstoffset(-1), abbr(-1) {}
147
148 // A restricted equality, of just the raw and dst offset
matchesZoneType149 bool matches(const ZoneType& other) {
150 return rawoffset == other.rawoffset &&
151 dstoffset == other.dstoffset;
152 }
153 };
154
155 // A collection of transitions from one ZoneType to another, together
156 // with a list of the ZoneTypes. A ZoneInfo object may have a long
157 // list of transitions between a smaller list of ZoneTypes.
158 //
159 // This object represents the contents of a single zic-created
160 // zoneinfo file.
161 struct ZoneInfo {
162 vector<Transition> transitions;
163 vector<ZoneType> types;
164 vector<string> abbrs;
165
166 string finalRuleID;
167 int32_t finalOffset;
168 int32_t finalYear; // -1 if none
169
170 // If this is an alias, then all other fields are meaningless, and
171 // this field will point to the "real" zone 0..n-1.
172 int32_t aliasTo; // -1 if this is a "real" zone
173
174 // If there are aliases TO this zone, then the following set will
175 // contain their index numbers (each index >= 0).
176 set<int32_t> aliases;
177
ZoneInfoZoneInfo178 ZoneInfo() : finalYear(-1), aliasTo(-1) {}
179
180 void mergeFinalData(const FinalZone& fz);
181
182 void optimizeTypeList();
183
184 // Set this zone to be an alias TO another zone.
185 void setAliasTo(int32_t index);
186
187 // Clear the list of aliases OF this zone.
188 void clearAliases();
189
190 // Add an alias to the list of aliases OF this zone.
191 void addAlias(int32_t index);
192
193 // Is this an alias to another zone?
isAliasZoneInfo194 bool isAlias() const {
195 return aliasTo >= 0;
196 }
197
198 // Retrieve alias list
getAliasesZoneInfo199 const set<int32_t>& getAliases() const {
200 return aliases;
201 }
202
203 void print(ostream& os, const string& id) const;
204 };
205
clearAliases()206 void ZoneInfo::clearAliases() {
207 assert(aliasTo < 0);
208 aliases.clear();
209 }
210
addAlias(int32_t index)211 void ZoneInfo::addAlias(int32_t index) {
212 assert(aliasTo < 0 && index >= 0 && aliases.find(index) == aliases.end());
213 aliases.insert(index);
214 }
215
setAliasTo(int32_t index)216 void ZoneInfo::setAliasTo(int32_t index) {
217 assert(index >= 0);
218 assert(aliases.size() == 0);
219 aliasTo = index;
220 }
221
222 typedef map<string, ZoneInfo> ZoneMap;
223
224 typedef ZoneMap::const_iterator ZoneMapIter;
225
226 //--------------------------------------------------------------------
227 // ZONEINFO
228 //--------------------------------------------------------------------
229
230 // Global map holding all our ZoneInfo objects, indexed by id.
231 ZoneMap ZONEINFO;
232
233 //--------------------------------------------------------------------
234 // zoneinfo file parsing
235 //--------------------------------------------------------------------
236
237 // Read zic-coded 32-bit integer from file
readcoded(ifstream & file,int64_t minv=numeric_limits<int64_t>::min (),int64_t maxv=numeric_limits<int64_t>::max ())238 int64_t readcoded(ifstream& file, int64_t minv=numeric_limits<int64_t>::min(),
239 int64_t maxv=numeric_limits<int64_t>::max()) {
240 unsigned char buf[4]; // must be UNSIGNED
241 int64_t val=0;
242 file.read((char*)buf, 4);
243 for(int32_t i=0,shift=24;i<4;++i,shift-=8) {
244 val |= buf[i] << shift;
245 }
246 if (val < minv || val > maxv) {
247 ostringstream os;
248 os << "coded value out-of-range: " << val << ", expected ["
249 << minv << ", " << maxv << "]";
250 throw out_of_range(os.str());
251 }
252 return val;
253 }
254
255 // Read zic-coded 64-bit integer from file
readcoded64(ifstream & file,int64_t minv=numeric_limits<int64_t>::min (),int64_t maxv=numeric_limits<int64_t>::max ())256 int64_t readcoded64(ifstream& file, int64_t minv=numeric_limits<int64_t>::min(),
257 int64_t maxv=numeric_limits<int64_t>::max()) {
258 unsigned char buf[8]; // must be UNSIGNED
259 int64_t val=0;
260 file.read((char*)buf, 8);
261 for(int32_t i=0,shift=56;i<8;++i,shift-=8) {
262 val |= (int64_t)buf[i] << shift;
263 }
264 if (val < minv || val > maxv) {
265 ostringstream os;
266 os << "coded value out-of-range: " << val << ", expected ["
267 << minv << ", " << maxv << "]";
268 throw out_of_range(os.str());
269 }
270 return val;
271 }
272
273 // Read a boolean value
readbool(ifstream & file)274 bool readbool(ifstream& file) {
275 char c;
276 file.read(&c, 1);
277 if (c!=0 && c!=1) {
278 ostringstream os;
279 os << "boolean value out-of-range: " << (int32_t)c;
280 throw out_of_range(os.str());
281 }
282 return (c!=0);
283 }
284
285 /**
286 * Read the zoneinfo file structure (see tzfile.h) into a ZoneInfo
287 * @param file an already-open file stream
288 */
readzoneinfo(ifstream & file,ZoneInfo & info,bool is64bitData=false)289 void readzoneinfo(ifstream& file, ZoneInfo& info, bool is64bitData=false) {
290 int32_t i;
291
292 // Check for TZ_ICU_MAGIC signature at file start. If we get a
293 // signature mismatch, it means we're trying to read a file which
294 // isn't a ICU-modified-zic-created zoneinfo file. Typically this
295 // means the user is passing in a "normal" zoneinfo directory, or
296 // a zoneinfo directory that is polluted with other files, or that
297 // the user passed in the wrong directory.
298 char buf[32];
299 file.read(buf, 4);
300 if (strncmp(buf, TZ_ICU_MAGIC, 4) != 0) {
301 throw invalid_argument("TZ_ICU_MAGIC signature missing");
302 }
303 // skip additional Olson byte version
304 file.read(buf, 1);
305 // if '\0', we have just one copy of data, if '2', there is additional
306 // 64 bit version at the end.
307 if(buf[0]!=0 && buf[0]!='2') {
308 throw invalid_argument("Bad Olson version info");
309 }
310
311 // Read reserved bytes. The first of these will be a version byte.
312 file.read(buf, 15);
313 if (*(ICUZoneinfoVersion*)&buf != TZ_ICU_VERSION) {
314 throw invalid_argument("File version mismatch");
315 }
316
317 // Read array sizes
318 int64_t isgmtcnt = readcoded(file, 0);
319 int64_t isdstcnt = readcoded(file, 0);
320 int64_t leapcnt = readcoded(file, 0);
321 int64_t timecnt = readcoded(file, 0);
322 int64_t typecnt = readcoded(file, 0);
323 int64_t charcnt = readcoded(file, 0);
324
325 // Confirm sizes that we assume to be equal. These assumptions
326 // are drawn from a reading of the zic source (2003a), so they
327 // should hold unless the zic source changes.
328 if (isgmtcnt != typecnt || isdstcnt != typecnt) {
329 throw invalid_argument("count mismatch between tzh_ttisgmtcnt, tzh_ttisdstcnt, tth_typecnt");
330 }
331
332 // Used temporarily to store transition times and types. We need
333 // to do this because the times and types are stored in two
334 // separate arrays.
335 vector<int64_t> transitionTimes(timecnt, -1); // temporary
336 vector<int32_t> transitionTypes(timecnt, -1); // temporary
337
338 // Read transition times
339 for (i=0; i<timecnt; ++i) {
340 if (is64bitData) {
341 transitionTimes[i] = readcoded64(file);
342 } else {
343 transitionTimes[i] = readcoded(file);
344 }
345 }
346
347 // Read transition types
348 for (i=0; i<timecnt; ++i) {
349 unsigned char c;
350 file.read((char*) &c, 1);
351 int32_t t = (int32_t) c;
352 if (t < 0 || t >= typecnt) {
353 ostringstream os;
354 os << "illegal type: " << t << ", expected [0, " << (typecnt-1) << "]";
355 throw out_of_range(os.str());
356 }
357 transitionTypes[i] = t;
358 }
359
360 // Build transitions vector out of corresponding times and types.
361 bool insertInitial = false;
362 if (is64bitData) {
363 if (timecnt > 0) {
364 int32_t minidx = -1;
365 for (i=0; i<timecnt; ++i) {
366 if (transitionTimes[i] < LOWEST_TIME32) {
367 if (minidx == -1 || transitionTimes[i] > transitionTimes[minidx]) {
368 // Preserve the latest transition before the 32bit minimum time
369 minidx = i;
370 }
371 } else {
372 info.transitions.push_back(Transition(transitionTimes[i], transitionTypes[i]));
373 }
374 }
375
376 if (minidx != -1) {
377 // If there are any transitions before the 32bit minimum time,
378 // put the type information with the 32bit minimum time
379 vector<Transition>::iterator itr = info.transitions.begin();
380 info.transitions.insert(itr, Transition(LOWEST_TIME32, transitionTypes[minidx]));
381 } else {
382 // Otherwise, we need insert the initial type later
383 insertInitial = true;
384 }
385 }
386 } else {
387 for (i=0; i<timecnt; ++i) {
388 info.transitions.push_back(Transition(transitionTimes[i], transitionTypes[i]));
389 }
390 }
391
392 // Read types (except for the isdst and isgmt flags, which come later (why??))
393 for (i=0; i<typecnt; ++i) {
394 ZoneType type;
395
396 type.rawoffset = readcoded(file);
397 type.dstoffset = readcoded(file);
398 type.isdst = readbool(file);
399
400 unsigned char c;
401 file.read((char*) &c, 1);
402 type.abbr = (int32_t) c;
403
404 if (type.isdst != (type.dstoffset != 0)) {
405 throw invalid_argument("isdst does not reflect dstoffset");
406 }
407
408 info.types.push_back(type);
409 }
410
411 assert(info.types.size() == (unsigned) typecnt);
412
413 if (insertInitial) {
414 assert(timecnt > 0);
415 assert(typecnt > 0);
416
417 int32_t initialTypeIdx = -1;
418
419 // Check if the first type is not dst
420 if (info.types.at(0).dstoffset != 0) {
421 // Initial type's rawoffset is same with the rawoffset after the
422 // first transition, but no DST is observed.
423 int64_t rawoffset0 = (info.types.at(info.transitions.at(0).type)).rawoffset;
424 // Look for matching type
425 for (i=0; i<(int32_t)info.types.size(); ++i) {
426 if (info.types.at(i).rawoffset == rawoffset0
427 && info.types.at(i).dstoffset == 0) {
428 initialTypeIdx = i;
429 break;
430 }
431 }
432 } else {
433 initialTypeIdx = 0;
434 }
435 assert(initialTypeIdx >= 0);
436 // Add the initial type associated with the lowest int32 time
437 vector<Transition>::iterator itr = info.transitions.begin();
438 info.transitions.insert(itr, Transition(LOWEST_TIME32, initialTypeIdx));
439 }
440
441
442 // Read the abbreviation string
443 if (charcnt) {
444 // All abbreviations are concatenated together, with a 0 at
445 // the end of each abbr.
446 char* str = new char[charcnt + 8];
447 file.read(str, charcnt);
448
449 // Split abbreviations apart into individual strings. Record
450 // offset of each abbr in a vector.
451 vector<int32_t> abbroffset;
452 char *limit=str+charcnt;
453 for (char* p=str; p<limit; ++p) {
454 char* start = p;
455 while (*p != 0) ++p;
456 info.abbrs.push_back(string(start, p-start));
457 abbroffset.push_back(start-str);
458 }
459
460 // Remap all the abbrs. Old value is offset into concatenated
461 // raw abbr strings. New value is index into vector of
462 // strings. E.g., 0,5,10,14 => 0,1,2,3.
463
464 // Keep track of which abbreviations get used.
465 vector<bool> abbrseen(abbroffset.size(), false);
466
467 for (vector<ZoneType>::iterator it=info.types.begin();
468 it!=info.types.end();
469 ++it) {
470 vector<int32_t>::const_iterator x=
471 find(abbroffset.begin(), abbroffset.end(), it->abbr);
472 if (x==abbroffset.end()) {
473 // TODO: Modify code to add a new string to the end of
474 // the abbr list when a middle offset is given, e.g.,
475 // "abc*def*" where * == '\0', take offset of 1 and
476 // make the array "abc", "def", "bc", and translate 1
477 // => 2. NOT CRITICAL since we don't even use the
478 // abbr at this time.
479 #if 0
480 // TODO: Re-enable this warning if we start using
481 // the Olson abbr data, or if the above TODO is completed.
482 ostringstream os;
483 os << "Warning: unusual abbr offset " << it->abbr
484 << ", expected one of";
485 for (vector<int32_t>::const_iterator y=abbroffset.begin();
486 y!=abbroffset.end(); ++y) {
487 os << ' ' << *y;
488 }
489 cerr << os.str() << "; using 0" << endl;
490 #endif
491 it->abbr = 0;
492 } else {
493 int32_t index = x - abbroffset.begin();
494 it->abbr = index;
495 abbrseen[index] = true;
496 }
497 }
498
499 for (int32_t ii=0;ii<(int32_t) abbrseen.size();++ii) {
500 if (!abbrseen[ii]) {
501 cerr << "Warning: unused abbreviation: " << ii << endl;
502 }
503 }
504 }
505
506 // Read leap second info, if any.
507 // *** We discard leap second data. ***
508 for (i=0; i<leapcnt; ++i) {
509 readcoded(file); // transition time
510 readcoded(file); // total correction after above
511 }
512
513 // Read isstd flags
514 for (i=0; i<typecnt; ++i) info.types[i].isstd = readbool(file);
515
516 // Read isgmt flags
517 for (i=0; i<typecnt; ++i) info.types[i].isgmt = readbool(file);
518 }
519
520 //--------------------------------------------------------------------
521 // Directory and file reading
522 //--------------------------------------------------------------------
523
524 /**
525 * Process a single zoneinfo file, adding the data to ZONEINFO
526 * @param path the full path to the file, e.g., ".\zoneinfo\America\Los_Angeles"
527 * @param id the zone ID, e.g., "America/Los_Angeles"
528 */
handleFile(string path,string id)529 void handleFile(string path, string id) {
530 // Check for duplicate id
531 if (ZONEINFO.find(id) != ZONEINFO.end()) {
532 ostringstream os;
533 os << "duplicate zone ID: " << id;
534 throw invalid_argument(os.str());
535 }
536
537 ifstream file(path.c_str(), ios::in | ios::binary);
538 if (!file) {
539 throw invalid_argument("can't open file");
540 }
541
542 ZoneInfo info;
543 readzoneinfo(file, info);
544
545 // Check for errors
546 if (!file) {
547 throw invalid_argument("read error");
548 }
549
550 #ifdef USE64BITDATA
551 ZoneInfo info64;
552 readzoneinfo(file, info64, true);
553
554 bool alldone = false;
555 int64_t eofPos = (int64_t) file.tellg();
556
557 // '\n' + <envvar string> + '\n' after the 64bit version data
558 char ch = file.get();
559 if (ch == 0x0a) {
560 bool invalidchar = false;
561 while (file.get(ch)) {
562 if (ch == 0x0a) {
563 break;
564 }
565 if (ch < 0x20) {
566 // must be printable ascii
567 invalidchar = true;
568 break;
569 }
570 }
571 if (!invalidchar) {
572 eofPos = (int64_t) file.tellg();
573 file.seekg(0, ios::end);
574 eofPos = eofPos - (int64_t) file.tellg();
575 if (eofPos == 0) {
576 alldone = true;
577 }
578 }
579 }
580 if (!alldone) {
581 ostringstream os;
582 os << (-eofPos) << " unprocessed bytes at end";
583 throw invalid_argument(os.str());
584 }
585
586 ZONEINFO[id] = info64;
587
588 #else
589 // Check eof-relative pos (there may be a cleaner way to do this)
590 int64_t eofPos = (int64_t) file.tellg();
591 char buf[32];
592 file.read(buf, 4);
593 file.seekg(0, ios::end);
594 eofPos = eofPos - (int64_t) file.tellg();
595 if (eofPos) {
596 // 2006c merged 32 and 64 bit versions in a fat binary
597 // 64 version starts at the end of 32 bit version.
598 // Therefore, if the file is *not* consumed, check
599 // if it is maybe being restarted.
600 if (strncmp(buf, TZ_ICU_MAGIC, 4) != 0) {
601 ostringstream os;
602 os << (-eofPos) << " unprocessed bytes at end";
603 throw invalid_argument(os.str());
604 }
605 }
606 ZONEINFO[id] = info;
607 #endif
608 }
609
610 /**
611 * Recursively scan the given directory, calling handleFile() for each
612 * file in the tree. The user should call with the root directory and
613 * a prefix of "". The function will call itself with non-empty
614 * prefix values.
615 */
616 #ifdef WIN32
617
scandir(string dirname,string prefix="")618 void scandir(string dirname, string prefix="") {
619 HANDLE hList;
620 WIN32_FIND_DATA FileData;
621
622 // Get the first file
623 hList = FindFirstFile((dirname + "\\*").c_str(), &FileData);
624 if (hList == INVALID_HANDLE_VALUE) {
625 cerr << "Error: Invalid directory: " << dirname << endl;
626 exit(1);
627 }
628 for (;;) {
629 string name(FileData.cFileName);
630 string path(dirname + "\\" + name);
631 if (FileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
632 if (name != "." && name != "..") {
633 scandir(path, prefix + name + "/");
634 }
635 } else {
636 try {
637 string id = prefix + name;
638 handleFile(path, id);
639 } catch (const exception& e) {
640 cerr << "Error: While processing \"" << path << "\", "
641 << e.what() << endl;
642 exit(1);
643 }
644 }
645
646 if (!FindNextFile(hList, &FileData)) {
647 if (GetLastError() == ERROR_NO_MORE_FILES) {
648 break;
649 } // else...?
650 }
651 }
652 FindClose(hList);
653 }
654
655 #else
656
scandir(string dir,string prefix="")657 void scandir(string dir, string prefix="") {
658 DIR *dp;
659 struct dirent *dir_entry;
660 struct stat stat_info;
661 char pwd[512];
662 vector<string> subdirs;
663 vector<string> subfiles;
664
665 if ((dp = opendir(dir.c_str())) == NULL) {
666 cerr << "Error: Invalid directory: " << dir << endl;
667 exit(1);
668 }
669 if (!getcwd(pwd, sizeof(pwd))) {
670 cerr << "Error: Directory name too long" << endl;
671 exit(1);
672 }
673 chdir(dir.c_str());
674 while ((dir_entry = readdir(dp)) != NULL) {
675 string name = dir_entry->d_name;
676 string path = dir + "/" + name;
677 lstat(dir_entry->d_name,&stat_info);
678 if (S_ISDIR(stat_info.st_mode)) {
679 if (name != "." && name != "..") {
680 subdirs.push_back(path);
681 subdirs.push_back(prefix + name + "/");
682 // scandir(path, prefix + name + "/");
683 }
684 } else {
685 try {
686 string id = prefix + name;
687 subfiles.push_back(path);
688 subfiles.push_back(id);
689 // handleFile(path, id);
690 } catch (const exception& e) {
691 cerr << "Error: While processing \"" << path << "\", "
692 << e.what() << endl;
693 exit(1);
694 }
695 }
696 }
697 closedir(dp);
698 chdir(pwd);
699
700 for(int32_t i=0;i<(int32_t)subfiles.size();i+=2) {
701 try {
702 handleFile(subfiles[i], subfiles[i+1]);
703 } catch (const exception& e) {
704 cerr << "Error: While processing \"" << subfiles[i] << "\", "
705 << e.what() << endl;
706 exit(1);
707 }
708 }
709 for(int32_t i=0;i<(int32_t)subdirs.size();i+=2) {
710 scandir(subdirs[i], subdirs[i+1]);
711 }
712 }
713
714 #endif
715
716 //--------------------------------------------------------------------
717 // Final zone and rule info
718 //--------------------------------------------------------------------
719
720 /**
721 * Read and discard the current line.
722 */
consumeLine(istream & in)723 void consumeLine(istream& in) {
724 int32_t c;
725 do {
726 c = in.get();
727 } while (c != EOF && c != '\n');
728 }
729
730 enum {
731 DOM = 0,
732 DOWGEQ = 1,
733 DOWLEQ = 2
734 };
735
736 const char* TIME_MODE[] = {"w", "s", "u"};
737
738 // Allow 29 days in February because zic outputs February 29
739 // for rules like "last Sunday in February".
740 const int32_t MONTH_LEN[] = {31,29,31,30,31,30,31,31,30,31,30,31};
741
742 const int32_t HOUR = 3600;
743
744 struct FinalZone {
745 int32_t offset; // raw offset
746 int32_t year; // takes effect for y >= year
747 string ruleid;
748 set<string> aliases;
FinalZoneFinalZone749 FinalZone(int32_t _offset, int32_t _year, const string& _ruleid) :
750 offset(_offset), year(_year), ruleid(_ruleid) {
751 if (offset <= -16*HOUR || offset >= 16*HOUR) {
752 ostringstream os;
753 os << "Invalid input offset " << offset
754 << " for year " << year
755 << " and rule ID " << ruleid;
756 throw invalid_argument(os.str());
757 }
758 if (year < 1900 || year >= 2050) {
759 ostringstream os;
760 os << "Invalid input year " << year
761 << " with offset " << offset
762 << " and rule ID " << ruleid;
763 throw invalid_argument(os.str());
764 }
765 }
FinalZoneFinalZone766 FinalZone() : offset(-1), year(-1) {}
addLinkFinalZone767 void addLink(const string& alias) {
768 if (aliases.find(alias) != aliases.end()) {
769 ostringstream os;
770 os << "Duplicate alias " << alias;
771 throw invalid_argument(os.str());
772 }
773 aliases.insert(alias);
774 }
775 };
776
777 struct FinalRulePart {
778 int32_t mode;
779 int32_t month;
780 int32_t dom;
781 int32_t dow;
782 int32_t time;
783 int32_t offset; // dst offset, usually either 0 or 1:00
784
785 // Isstd and isgmt only have 3 valid states, corresponding to local
786 // wall time, local standard time, and GMT standard time.
787 // Here is how the isstd & isgmt flags are set by zic:
788 //| case 's': /* Standard */
789 //| rp->r_todisstd = TRUE;
790 //| rp->r_todisgmt = FALSE;
791 //| case 'w': /* Wall */
792 //| rp->r_todisstd = FALSE;
793 //| rp->r_todisgmt = FALSE;
794 //| case 'g': /* Greenwich */
795 //| case 'u': /* Universal */
796 //| case 'z': /* Zulu */
797 //| rp->r_todisstd = TRUE;
798 //| rp->r_todisgmt = TRUE;
799 bool isstd;
800 bool isgmt;
801
802 bool isset; // used during building; later ignored
803
FinalRulePartFinalRulePart804 FinalRulePart() : isset(false) {}
setFinalRulePart805 void set(const string& id,
806 const string& _mode,
807 int32_t _month,
808 int32_t _dom,
809 int32_t _dow,
810 int32_t _time,
811 bool _isstd,
812 bool _isgmt,
813 int32_t _offset) {
814 if (isset) {
815 throw invalid_argument("FinalRulePart set twice");
816 }
817 isset = true;
818 if (_mode == "DOWLEQ") {
819 mode = DOWLEQ;
820 } else if (_mode == "DOWGEQ") {
821 mode = DOWGEQ;
822 } else if (_mode == "DOM") {
823 mode = DOM;
824 } else {
825 throw invalid_argument("Unrecognized FinalRulePart mode");
826 }
827 month = _month;
828 dom = _dom;
829 dow = _dow;
830 time = _time;
831 isstd = _isstd;
832 isgmt = _isgmt;
833 offset = _offset;
834
835 ostringstream os;
836 if (month < 0 || month >= 12) {
837 os << "Invalid input month " << month;
838 }
839 if (dom < 1 || dom > MONTH_LEN[month]) {
840 os << "Invalid input day of month " << dom;
841 }
842 if (mode != DOM && (dow < 0 || dow >= 7)) {
843 os << "Invalid input day of week " << dow;
844 }
845 if (offset < 0 || offset > HOUR) {
846 os << "Invalid input offset " << offset;
847 }
848 if (isgmt && !isstd) {
849 os << "Invalid input isgmt && !isstd";
850 }
851 if (!os.str().empty()) {
852 os << " for rule "
853 << id
854 << _mode
855 << month << dom << dow << time
856 << isstd << isgmt
857 << offset;
858 throw invalid_argument(os.str());
859 }
860 }
861
862 /**
863 * Return the time mode as an ICU SimpleTimeZone int from 0..2;
864 * see simpletz.h.
865 */
timemodeFinalRulePart866 int32_t timemode() const {
867 if (isgmt) {
868 assert(isstd);
869 return 2; // gmt standard
870 }
871 if (isstd) {
872 return 1; // local standard
873 }
874 return 0; // local wall
875 }
876
877 // The SimpleTimeZone encoding method for rules is as follows:
878 // stz_dowim stz_dow
879 // DOM: dom 0
880 // DOWGEQ: dom -(dow+1)
881 // DOWLEQ: -dom -(dow+1)
882 // E.g., to encode Mon>=7, use stz_dowim=7, stz_dow=-2
883 // to encode Mon<=7, use stz_dowim=-7, stz_dow=-2
884 // to encode 7, use stz_dowim=7, stz_dow=0
885 // Note that for this program and for SimpleTimeZone, 0==Jan,
886 // but for this program 0==Sun while for SimpleTimeZone 1==Sun.
887
888 /**
889 * Return a "dowim" param suitable for SimpleTimeZone.
890 */
stz_dowimFinalRulePart891 int32_t stz_dowim() const {
892 return (mode == DOWLEQ) ? -dom : dom;
893 }
894
895 /**
896 * Return a "dow" param suitable for SimpleTimeZone.
897 */
stz_dowFinalRulePart898 int32_t stz_dow() const {
899 return (mode == DOM) ? 0 : -(dow+1);
900 }
901 };
902
903 struct FinalRule {
904 FinalRulePart part[2];
905
issetFinalRule906 bool isset() const {
907 return part[0].isset && part[1].isset;
908 }
909
910 void print(ostream& os) const;
911 };
912
913 map<string,FinalZone> finalZones;
914 map<string,FinalRule> finalRules;
915
916 map<string, set<string> > links;
917 map<string, string> reverseLinks;
918 map<string, string> linkSource; // id => "Olson link" or "ICU alias"
919
920 /**
921 * Predicate used to find FinalRule objects that do not have both
922 * sub-parts set (indicating an error in the input file).
923 */
isNotSet(const pair<const string,FinalRule> & p)924 bool isNotSet(const pair<const string,FinalRule>& p) {
925 return !p.second.isset();
926 }
927
928 /**
929 * Predicate used to find FinalZone objects that do not map to a known
930 * rule (indicating an error in the input file).
931 */
mapsToUnknownRule(const pair<const string,FinalZone> & p)932 bool mapsToUnknownRule(const pair<const string,FinalZone>& p) {
933 return finalRules.find(p.second.ruleid) == finalRules.end();
934 }
935
936 /**
937 * This set is used to make sure each rule in finalRules is used at
938 * least once. First we populate it with all the rules from
939 * finalRules; then we remove all the rules referred to in
940 * finaleZones.
941 */
942 set<string> ruleIDset;
943
insertRuleID(const pair<string,FinalRule> & p)944 void insertRuleID(const pair<string,FinalRule>& p) {
945 ruleIDset.insert(p.first);
946 }
947
eraseRuleID(const pair<string,FinalZone> & p)948 void eraseRuleID(const pair<string,FinalZone>& p) {
949 ruleIDset.erase(p.second.ruleid);
950 }
951
952 /**
953 * Populate finalZones and finalRules from the given istream.
954 */
readFinalZonesAndRules(istream & in)955 void readFinalZonesAndRules(istream& in) {
956
957 for (;;) {
958 string token;
959 in >> token;
960 if (in.eof() || !in) {
961 break;
962 } else if (token == "zone") {
963 // zone Africa/Cairo 7200 1995 Egypt # zone Africa/Cairo, offset 7200, year >= 1995, rule Egypt (0)
964 string id, ruleid;
965 int32_t offset, year;
966 in >> id >> offset >> year >> ruleid;
967 consumeLine(in);
968 finalZones[id] = FinalZone(offset, year, ruleid);
969 } else if (token == "rule") {
970 // rule US DOWGEQ 3 1 0 7200 0 0 3600 # 52: US, file data/northamerica, line 119, mode DOWGEQ, April, dom 1, Sunday, time 7200, isstd 0, isgmt 0, offset 3600
971 // rule US DOWLEQ 9 31 0 7200 0 0 0 # 53: US, file data/northamerica, line 114, mode DOWLEQ, October, dom 31, Sunday, time 7200, isstd 0, isgmt 0, offset 0
972 string id, mode;
973 int32_t month, dom, dow, time, offset;
974 bool isstd, isgmt;
975 in >> id >> mode >> month >> dom >> dow >> time >> isstd >> isgmt >> offset;
976 consumeLine(in);
977 FinalRule& fr = finalRules[id];
978 int32_t p = fr.part[0].isset ? 1 : 0;
979 fr.part[p].set(id, mode, month, dom, dow, time, isstd, isgmt, offset);
980 } else if (token == "link") {
981 string fromid, toid; // fromid == "real" zone, toid == alias
982 in >> fromid >> toid;
983 // DO NOT consumeLine(in);
984 if (finalZones.find(toid) != finalZones.end()) {
985 throw invalid_argument("Bad link: `to' id is a \"real\" zone");
986 }
987
988 links[fromid].insert(toid);
989 reverseLinks[toid] = fromid;
990
991 linkSource[fromid] = "Olson link";
992 linkSource[toid] = "Olson link";
993 } else if (token.length() > 0 && token[0] == '#') {
994 consumeLine(in);
995 } else {
996 throw invalid_argument("Unrecognized keyword");
997 }
998 }
999
1000 if (!in.eof() && !in) {
1001 throw invalid_argument("Parse failure");
1002 }
1003
1004 // Perform validity check: Each rule should have data for 2 parts.
1005 if (count_if(finalRules.begin(), finalRules.end(), isNotSet) != 0) {
1006 throw invalid_argument("One or more incomplete rule pairs");
1007 }
1008
1009 // Perform validity check: Each zone should map to a known rule.
1010 if (count_if(finalZones.begin(), finalZones.end(), mapsToUnknownRule) != 0) {
1011 throw invalid_argument("One or more zones refers to an unknown rule");
1012 }
1013
1014 // Perform validity check: Each rule should be referred to by a zone.
1015 ruleIDset.clear();
1016 for_each(finalRules.begin(), finalRules.end(), insertRuleID);
1017 for_each(finalZones.begin(), finalZones.end(), eraseRuleID);
1018 if (ruleIDset.size() != 0) {
1019 throw invalid_argument("Unused rules");
1020 }
1021 }
1022
1023 //--------------------------------------------------------------------
1024 // Resource bundle output
1025 //--------------------------------------------------------------------
1026
1027 // SEE olsontz.h FOR RESOURCE BUNDLE DATA LAYOUT
1028
print(ostream & os,const string & id) const1029 void ZoneInfo::print(ostream& os, const string& id) const {
1030 // Implement compressed format #2:
1031
1032 os << " /* " << id << " */ ";
1033
1034 if (aliasTo >= 0) {
1035 assert(aliases.size() == 0);
1036 os << ":int { " << aliasTo << " } "; // No endl - save room for comment.
1037 return;
1038 }
1039
1040 os << ":array {" << endl;
1041
1042 vector<Transition>::const_iterator trn;
1043 vector<ZoneType>::const_iterator typ;
1044
1045 bool first=true;
1046 os << " :intvector { ";
1047 for (trn = transitions.begin(); trn != transitions.end(); ++trn) {
1048 if (!first) os << ", ";
1049 first = false;
1050 os << trn->time;
1051 }
1052 os << " }" << endl;
1053
1054 first=true;
1055 os << " :intvector { ";
1056 for (typ = types.begin(); typ != types.end(); ++typ) {
1057 if (!first) os << ", ";
1058 first = false;
1059 os << typ->rawoffset << ", " << typ->dstoffset;
1060 }
1061 os << " }" << endl;
1062
1063 os << " :bin { \"" << hex << setfill('0');
1064 for (trn = transitions.begin(); trn != transitions.end(); ++trn) {
1065 os << setw(2) << trn->type;
1066 }
1067 os << dec << "\" }" << endl;
1068
1069 // Final zone info, if any
1070 if (finalYear != -1) {
1071 os << " \"" << finalRuleID << "\"" << endl;
1072 os << " :intvector { " << finalOffset << ", "
1073 << finalYear << " }" << endl;
1074 }
1075
1076 // Alias list, if any
1077 if (aliases.size() != 0) {
1078 first = true;
1079 os << " :intvector { ";
1080 for (set<int32_t>::const_iterator i=aliases.begin(); i!=aliases.end(); ++i) {
1081 if (!first) os << ", ";
1082 first = false;
1083 os << *i;
1084 }
1085 os << " }" << endl;
1086 }
1087
1088 os << " } "; // no trailing 'endl', so comments can be placed.
1089 }
1090
1091 inline ostream&
operator <<(ostream & os,const ZoneMap & zoneinfo)1092 operator<<(ostream& os, const ZoneMap& zoneinfo) {
1093 int32_t c = 0;
1094 for (ZoneMapIter it = zoneinfo.begin();
1095 it != zoneinfo.end();
1096 ++it) {
1097 if(c) os << ",";
1098 it->second.print(os, it->first);
1099 os << "//Z#" << c++ << endl;
1100 }
1101 return os;
1102 }
1103
1104 // print the string list
printStringList(ostream & os,const ZoneMap & zoneinfo)1105 ostream& printStringList( ostream& os, const ZoneMap& zoneinfo) {
1106 int32_t n = 0; // count
1107 int32_t col = 0; // column
1108 os << " Names {" << endl
1109 << " ";
1110 for (ZoneMapIter it = zoneinfo.begin();
1111 it != zoneinfo.end();
1112 ++it) {
1113 if(n) {
1114 os << ",";
1115 col ++;
1116 }
1117 const string& id = it->first;
1118 os << "\"" << id << "\"";
1119 col += id.length() + 2;
1120 if(col >= 50) {
1121 os << " // " << n << endl
1122 << " ";
1123 col = 0;
1124 }
1125 n++;
1126 }
1127 os << " // " << (n-1) << endl
1128 << " }" << endl;
1129
1130 return os;
1131 }
1132
1133 //--------------------------------------------------------------------
1134 // main
1135 //--------------------------------------------------------------------
1136
1137 // Unary predicate for finding transitions after a given time
isAfter(const Transition t,int64_t thresh)1138 bool isAfter(const Transition t, int64_t thresh) {
1139 return t.time >= thresh;
1140 }
1141
1142 /**
1143 * A zone type that contains only the raw and dst offset. Used by the
1144 * optimizeTypeList() method.
1145 */
1146 struct SimplifiedZoneType {
1147 int64_t rawoffset;
1148 int64_t dstoffset;
SimplifiedZoneTypeSimplifiedZoneType1149 SimplifiedZoneType() : rawoffset(-1), dstoffset(-1) {}
SimplifiedZoneTypeSimplifiedZoneType1150 SimplifiedZoneType(const ZoneType& t) : rawoffset(t.rawoffset),
1151 dstoffset(t.dstoffset) {}
operator <SimplifiedZoneType1152 bool operator<(const SimplifiedZoneType& t) const {
1153 return rawoffset < t.rawoffset ||
1154 (rawoffset == t.rawoffset &&
1155 dstoffset < t.dstoffset);
1156 }
1157 };
1158
1159 /**
1160 * Construct a ZoneType from a SimplifiedZoneType. Note that this
1161 * discards information; the new ZoneType will have meaningless
1162 * (empty) abbr, isdst, isstd, and isgmt flags; this is appropriate,
1163 * since ignoring these is how we do optimization (we have no use for
1164 * these in historical transitions).
1165 */
ZoneType(const SimplifiedZoneType & t)1166 ZoneType::ZoneType(const SimplifiedZoneType& t) :
1167 rawoffset(t.rawoffset), dstoffset(t.dstoffset),
1168 abbr(-1), isdst(false), isstd(false), isgmt(false) {}
1169
1170 /**
1171 * Optimize the type list to remove excess entries. The type list may
1172 * contain entries that are distinct only in terms of their dst, std,
1173 * or gmt flags. Since we don't care about those flags, we can reduce
1174 * the type list to a set of unique raw/dst offset pairs, and remap
1175 * the type indices in the transition list, which stores, for each
1176 * transition, a transition time and a type index.
1177 */
optimizeTypeList()1178 void ZoneInfo::optimizeTypeList() {
1179 // Assemble set of unique types; only those in the `transitions'
1180 // list, since there may be unused types in the `types' list
1181 // corresponding to transitions that have been trimmed (during
1182 // merging of final data).
1183
1184 if (aliasTo >= 0) return; // Nothing to do for aliases
1185
1186 // If there are zero transitions and one type, then leave that as-is.
1187 if (transitions.size() == 0) {
1188 if (types.size() != 1) {
1189 cerr << "Error: transition count = 0, type count = " << types.size() << endl;
1190 }
1191 return;
1192 }
1193
1194 set<SimplifiedZoneType> simpleset;
1195 for (vector<Transition>::const_iterator i=transitions.begin();
1196 i!=transitions.end(); ++i) {
1197 assert(i->type < (int32_t)types.size());
1198 simpleset.insert(types[i->type]);
1199 }
1200
1201 // Map types to integer indices
1202 map<SimplifiedZoneType,int32_t> simplemap;
1203 int32_t n=0;
1204 for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin();
1205 i!=simpleset.end(); ++i) {
1206 simplemap[*i] = n++;
1207 }
1208
1209 // Remap transitions
1210 for (vector<Transition>::iterator i=transitions.begin();
1211 i!=transitions.end(); ++i) {
1212 assert(i->type < (int32_t)types.size());
1213 ZoneType oldtype = types[i->type];
1214 SimplifiedZoneType newtype(oldtype);
1215 assert(simplemap.find(newtype) != simplemap.end());
1216 i->type = simplemap[newtype];
1217 }
1218
1219 // Replace type list
1220 types.clear();
1221 copy(simpleset.begin(), simpleset.end(), back_inserter(types));
1222 }
1223
1224 /**
1225 * Merge final zone data into this zone.
1226 */
mergeFinalData(const FinalZone & fz)1227 void ZoneInfo::mergeFinalData(const FinalZone& fz) {
1228 int32_t year = fz.year;
1229 int64_t seconds = yearToSeconds(year);
1230 vector<Transition>::iterator it =
1231 find_if(transitions.begin(), transitions.end(),
1232 bind2nd(ptr_fun(isAfter), seconds));
1233 transitions.erase(it, transitions.end());
1234
1235 if (finalYear != -1) {
1236 throw invalid_argument("Final zone already merged in");
1237 }
1238 finalYear = fz.year;
1239 finalOffset = fz.offset;
1240 finalRuleID = fz.ruleid;
1241 }
1242
1243 /**
1244 * Merge the data from the given final zone into the core zone data by
1245 * calling the ZoneInfo member function mergeFinalData.
1246 */
mergeOne(const string & zoneid,const FinalZone & fz)1247 void mergeOne(const string& zoneid, const FinalZone& fz) {
1248 if (ZONEINFO.find(zoneid) == ZONEINFO.end()) {
1249 throw invalid_argument("Unrecognized final zone ID");
1250 }
1251 ZONEINFO[zoneid].mergeFinalData(fz);
1252 }
1253
1254 /**
1255 * Visitor function that merges the final zone data into the main zone
1256 * data structures. It calls mergeOne for each final zone and its
1257 * list of aliases.
1258 */
mergeFinalZone(const pair<string,FinalZone> & p)1259 void mergeFinalZone(const pair<string,FinalZone>& p) {
1260 const string& id = p.first;
1261 const FinalZone& fz = p.second;
1262
1263 mergeOne(id, fz);
1264 }
1265
1266 /**
1267 * Print this rule in resource bundle format to os. ID and enclosing
1268 * braces handled elsewhere.
1269 */
print(ostream & os) const1270 void FinalRule::print(ostream& os) const {
1271 // First print the rule part that enters DST; then the rule part
1272 // that exits it.
1273 int32_t whichpart = (part[0].offset != 0) ? 0 : 1;
1274 assert(part[whichpart].offset != 0);
1275 assert(part[1-whichpart].offset == 0);
1276
1277 os << " ";
1278 for (int32_t i=0; i<2; ++i) {
1279 const FinalRulePart& p = part[whichpart];
1280 whichpart = 1-whichpart;
1281 os << p.month << ", " << p.stz_dowim() << ", " << p.stz_dow() << ", "
1282 << p.time << ", " << p.timemode() << ", ";
1283 }
1284 os << part[whichpart].offset << endl;
1285 }
1286
main(int argc,char * argv[])1287 int main(int argc, char *argv[]) {
1288 string rootpath, zonetab, version;
1289
1290 if (argc != 4) {
1291 cout << "Usage: tz2icu <dir> <cmap> <vers>" << endl
1292 << " <dir> path to zoneinfo file tree generated by" << endl
1293 << " ICU-patched version of zic" << endl
1294 << " <cmap> country map, from tzdata archive," << endl
1295 << " typically named \"zone.tab\"" << endl
1296 << " <vers> version string, such as \"2003e\"" << endl;
1297 exit(1);
1298 } else {
1299 rootpath = argv[1];
1300 zonetab = argv[2];
1301 version = argv[3];
1302 }
1303
1304 cout << "Olson data version: " << version << endl;
1305
1306 try {
1307 ifstream finals(ICU_ZONE_FILE);
1308 if (finals) {
1309 readFinalZonesAndRules(finals);
1310
1311 cout << "Finished reading " << finalZones.size()
1312 << " final zones and " << finalRules.size()
1313 << " final rules from " ICU_ZONE_FILE << endl;
1314 } else {
1315 cerr << "Error: Unable to open " ICU_ZONE_FILE << endl;
1316 return 1;
1317 }
1318 } catch (const exception& error) {
1319 cerr << "Error: While reading " ICU_ZONE_FILE ": " << error.what() << endl;
1320 return 1;
1321 }
1322
1323 //############################################################################
1324 //# Note: We no longer use tz.alias to define alias for legacy ICU time zones.
1325 //# The contents of tz.alias were migrated into zic source format and
1326 //# processed by zic as 'Link'.
1327 //############################################################################
1328 #if 0
1329 // Read the legacy alias list and process it. Treat the legacy mappings
1330 // like links, but also record them in the "legacy" hash.
1331 try {
1332 ifstream aliases(ICU_TZ_ALIAS);
1333 if (!aliases) {
1334 cerr << "Error: Unable to open " ICU_TZ_ALIAS << endl;
1335 return 1;
1336 }
1337 int32_t n = 0;
1338 string line;
1339 while (getline(aliases, line)) {
1340 string::size_type lb = line.find('#');
1341 if (lb != string::npos) {
1342 line.resize(lb); // trim comments
1343 }
1344 vector<string> a;
1345 istringstream is(line);
1346 copy(istream_iterator<string>(is),istream_iterator<string>(),
1347 back_inserter(a));
1348 if (a.size() == 0) continue; // blank line
1349 if (a.size() != 2) {
1350 cerr << "Error: Can't parse \"" << line << "\" in "
1351 ICU_TZ_ALIAS << endl;
1352 exit(1);
1353 }
1354 ++n;
1355
1356 string alias(a[0]), olson(a[1]);
1357 if (links.find(alias) != links.end()) {
1358 cerr << "Error: Alias \"" << alias
1359 << "\" is an Olson zone in "
1360 ICU_TZ_ALIAS << endl;
1361 return 1;
1362 }
1363 if (reverseLinks.find(alias) != reverseLinks.end()) {
1364 cerr << "Error: Alias \"" << alias
1365 << "\" is an Olson link to \"" << reverseLinks[olson]
1366 << "\" in " << ICU_TZ_ALIAS << endl;
1367 return 1;
1368 }
1369
1370 // Record source for error reporting
1371 if (linkSource.find(olson) == linkSource.end()) {
1372 linkSource[olson] = "ICU alias";
1373 }
1374 assert(linkSource.find(alias) == linkSource.end());
1375 linkSource[alias] = "ICU alias";
1376
1377 links[olson].insert(alias);
1378 reverseLinks[alias] = olson;
1379 }
1380 cout << "Finished reading " << n
1381 << " aliases from " ICU_TZ_ALIAS << endl;
1382 } catch (const exception& error) {
1383 cerr << "Error: While reading " ICU_TZ_ALIAS ": " << error.what() << endl;
1384 return 1;
1385 }
1386 #endif
1387 try {
1388 // Recursively scan all files below the given path, accumulating
1389 // their data into ZONEINFO. All files must be TZif files. Any
1390 // failure along the way will result in a call to exit(1).
1391 scandir(rootpath);
1392 } catch (const exception& error) {
1393 cerr << "Error: While scanning " << rootpath << ": " << error.what() << endl;
1394 return 1;
1395 }
1396
1397 cout << "Finished reading " << ZONEINFO.size() << " zoneinfo files ["
1398 << (ZONEINFO.begin())->first << ".."
1399 << (--ZONEINFO.end())->first << "]" << endl;
1400
1401 try {
1402 for_each(finalZones.begin(), finalZones.end(), mergeFinalZone);
1403 } catch (const exception& error) {
1404 cerr << "Error: While merging final zone data: " << error.what() << endl;
1405 return 1;
1406 }
1407
1408 // Process links (including ICU aliases). For each link set we have
1409 // a canonical ID (e.g., America/Los_Angeles) and a set of one or more
1410 // aliases (e.g., PST, PST8PDT, ...).
1411
1412 // 1. Add all aliases as zone objects in ZONEINFO
1413 for (map<string,set<string> >::const_iterator i = links.begin();
1414 i!=links.end(); ++i) {
1415 const string& olson = i->first;
1416 const set<string>& aliases = i->second;
1417 if (ZONEINFO.find(olson) == ZONEINFO.end()) {
1418 cerr << "Error: Invalid " << linkSource[olson] << " to non-existent \""
1419 << olson << "\"" << endl;
1420 return 1;
1421 }
1422 for (set<string>::const_iterator j=aliases.begin();
1423 j!=aliases.end(); ++j) {
1424 ZONEINFO[*j] = ZoneInfo();
1425 }
1426 }
1427
1428 // 2. Create a mapping from zones to index numbers 0..n-1.
1429 map<string,int32_t> zoneIDs;
1430 vector<string> zoneIDlist;
1431 int32_t z=0;
1432 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {
1433 zoneIDs[i->first] = z++;
1434 zoneIDlist.push_back(i->first);
1435 }
1436 assert(z == (int32_t) ZONEINFO.size());
1437
1438 // 3. Merge aliases. Sometimes aliases link to other aliases; we
1439 // resolve these into simplest possible sets.
1440 map<string,set<string> > links2;
1441 map<string,string> reverse2;
1442 for (map<string,set<string> >::const_iterator i = links.begin();
1443 i!=links.end(); ++i) {
1444 string olson = i->first;
1445 while (reverseLinks.find(olson) != reverseLinks.end()) {
1446 olson = reverseLinks[olson];
1447 }
1448 for (set<string>::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) {
1449 links2[olson].insert(*j);
1450 reverse2[*j] = olson;
1451 }
1452 }
1453 links = links2;
1454 reverseLinks = reverse2;
1455
1456 if (false) { // Debugging: Emit link map
1457 for (map<string,set<string> >::const_iterator i = links.begin();
1458 i!=links.end(); ++i) {
1459 cout << i->first << ": ";
1460 for (set<string>::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) {
1461 cout << *j << ", ";
1462 }
1463 cout << endl;
1464 }
1465 }
1466
1467 // 4. Update aliases
1468 for (map<string,set<string> >::const_iterator i = links.begin();
1469 i!=links.end(); ++i) {
1470 const string& olson = i->first;
1471 const set<string>& aliases = i->second;
1472 ZONEINFO[olson].clearAliases();
1473 ZONEINFO[olson].addAlias(zoneIDs[olson]);
1474 for (set<string>::const_iterator j=aliases.begin();
1475 j!=aliases.end(); ++j) {
1476 assert(zoneIDs.find(olson) != zoneIDs.end());
1477 assert(zoneIDs.find(*j) != zoneIDs.end());
1478 assert(ZONEINFO.find(*j) != ZONEINFO.end());
1479 ZONEINFO[*j].setAliasTo(zoneIDs[olson]);
1480 ZONEINFO[olson].addAlias(zoneIDs[*j]);
1481 }
1482 }
1483
1484 // Once merging of final data is complete, we can optimize the type list
1485 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {
1486 i->second.optimizeTypeList();
1487 }
1488
1489 // Create the country map
1490 map<string, set<string> > countryMap; // country -> set of zones
1491 map<string, string> reverseCountryMap; // zone -> country
1492 try {
1493 ifstream f(zonetab.c_str());
1494 if (!f) {
1495 cerr << "Error: Unable to open " << zonetab << endl;
1496 return 1;
1497 }
1498 int32_t n = 0;
1499 string line;
1500 while (getline(f, line)) {
1501 string::size_type lb = line.find('#');
1502 if (lb != string::npos) {
1503 line.resize(lb); // trim comments
1504 }
1505 string country, coord, zone;
1506 istringstream is(line);
1507 is >> country >> coord >> zone;
1508 if (country.size() == 0) continue;
1509 if (country.size() != 2 || zone.size() < 1) {
1510 cerr << "Error: Can't parse " << line << " in " << zonetab << endl;
1511 return 1;
1512 }
1513 if (ZONEINFO.find(zone) == ZONEINFO.end()) {
1514 cerr << "Error: Country maps to invalid zone " << zone
1515 << " in " << zonetab << endl;
1516 return 1;
1517 }
1518 countryMap[country].insert(zone);
1519 reverseCountryMap[zone] = country;
1520 //cerr << (n+1) << ": " << country << " <=> " << zone << endl;
1521 ++n;
1522 }
1523 cout << "Finished reading " << n
1524 << " country entries from " << zonetab << endl;
1525 } catch (const exception& error) {
1526 cerr << "Error: While reading " << zonetab << ": " << error.what() << endl;
1527 return 1;
1528 }
1529
1530 // Merge ICU aliases into country map. Don't merge any alias
1531 // that already has a country map, since that doesn't make sense.
1532 // E.g. "Link Europe/Oslo Arctic/Longyearbyen" doesn't mean we
1533 // should cross-map the countries between these two zones.
1534 for (map<string,set<string> >::const_iterator i = links.begin();
1535 i!=links.end(); ++i) {
1536 const string& olson(i->first);
1537 if (reverseCountryMap.find(olson) == reverseCountryMap.end()) {
1538 continue;
1539 }
1540 string c = reverseCountryMap[olson];
1541 const set<string>& aliases(i->second);
1542 for (set<string>::const_iterator j=aliases.begin();
1543 j != aliases.end(); ++j) {
1544 if (reverseCountryMap.find(*j) == reverseCountryMap.end()) {
1545 countryMap[c].insert(*j);
1546 reverseCountryMap[*j] = c;
1547 //cerr << "Aliased country: " << c << " <=> " << *j << endl;
1548 }
1549 }
1550 }
1551
1552 // Create a pseudo-country containing all zones belonging to no country
1553 set<string> nocountry;
1554 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {
1555 if (reverseCountryMap.find(i->first) == reverseCountryMap.end()) {
1556 nocountry.insert(i->first);
1557 }
1558 }
1559 countryMap[""] = nocountry;
1560
1561 // Get local time & year for below
1562 time_t sec;
1563 time(&sec);
1564 struct tm* now = localtime(&sec);
1565 int32_t thisYear = now->tm_year + 1900;
1566
1567 // Write out a resource-bundle source file containing data for
1568 // all zones.
1569 ofstream file(ICU_TZ_RESOURCE ".txt");
1570 if (file) {
1571 file << "//---------------------------------------------------------" << endl
1572 << "// Copyright (C) 2003";
1573 if (thisYear > 2003) {
1574 file << "-" << thisYear;
1575 }
1576 file << ", International Business Machines" << endl
1577 << "// Corporation and others. All Rights Reserved." << endl
1578 << "//---------------------------------------------------------" << endl
1579 << "// Build tool: tz2icu" << endl
1580 << "// Build date: " << asctime(now) /* << endl -- asctime emits CR */
1581 << "// Olson source: ftp://elsie.nci.nih.gov/pub/" << endl
1582 << "// Olson version: " << version << endl
1583 << "// ICU version: " << U_ICU_VERSION << endl
1584 << "//---------------------------------------------------------" << endl
1585 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl
1586 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl
1587 << "//---------------------------------------------------------" << endl
1588 << endl
1589 << ICU_TZ_RESOURCE ":table(nofallback) {" << endl
1590 << " TZVersion { \"" << version << "\" }" << endl
1591 << " Zones:array { " << endl
1592 << ZONEINFO // Zones (the actual data)
1593 << " }" << endl;
1594
1595 // Names correspond to the Zones list, used for binary searching.
1596 printStringList ( file, ZONEINFO ); // print the Names list
1597
1598 // Final Rules are used if requested by the zone
1599 file << " Rules { " << endl;
1600 // Emit final rules
1601 int32_t frc = 0;
1602 for(map<string,FinalRule>::iterator i=finalRules.begin();
1603 i!=finalRules.end(); ++i) {
1604 const string& id = i->first;
1605 const FinalRule& r = i->second;
1606 file << " " << id << ":intvector {" << endl;
1607 r.print(file);
1608 file << " } //_#" << frc++ << endl;
1609 }
1610 file << " }" << endl;
1611
1612 // Emit country (region) map. Emitting the string zone IDs results
1613 // in a 188 kb binary resource; emitting the zone index numbers
1614 // trims this to 171 kb. More work for the runtime code, but
1615 // a smaller data footprint.
1616 file << " Regions { " << endl;
1617 int32_t rc = 0;
1618 for (map<string, set<string> >::const_iterator i=countryMap.begin();
1619 i != countryMap.end(); ++i) {
1620 string country = i->first;
1621 const set<string>& zones(i->second);
1622 file << " ";
1623 if(country[0]==0) {
1624 file << "Default";
1625 }
1626 file << country << ":intvector { ";
1627 bool first = true;
1628 for (set<string>::const_iterator j=zones.begin();
1629 j != zones.end(); ++j) {
1630 if (!first) file << ", ";
1631 first = false;
1632 if (zoneIDs.find(*j) == zoneIDs.end()) {
1633 cerr << "Error: Nonexistent zone in country map: " << *j << endl;
1634 return 1;
1635 }
1636 file << zoneIDs[*j]; // emit the zone's index number
1637 }
1638 file << " } //R#" << rc++ << endl;
1639 }
1640 file << " }" << endl;
1641
1642 file << "}" << endl;
1643 }
1644
1645 file.close();
1646
1647 if (file) { // recheck error bit
1648 cout << "Finished writing " ICU_TZ_RESOURCE ".txt" << endl;
1649 } else {
1650 cerr << "Error: Unable to open/write to " ICU_TZ_RESOURCE ".txt" << endl;
1651 return 1;
1652 }
1653
1654 #define ICU4J_TZ_CLASS "ZoneMetaData"
1655
1656 // Write out a Java source file containing only a few pieces of
1657 // meta-data missing from the core JDK: the equivalency lists and
1658 // the country map.
1659 ofstream java(ICU4J_TZ_CLASS ".java");
1660 if (java) {
1661 java << "//---------------------------------------------------------" << endl
1662 << "// Copyright (C) 2003";
1663 if (thisYear > 2003) {
1664 java << "-" << thisYear;
1665 }
1666 java << ", International Business Machines" << endl
1667 << "// Corporation and others. All Rights Reserved." << endl
1668 << "//---------------------------------------------------------" << endl
1669 << "// Build tool: tz2icu" << endl
1670 << "// Build date: " << asctime(now) /* << endl -- asctime emits CR */
1671 << "// Olson source: ftp://elsie.nci.nih.gov/pub/" << endl
1672 << "// Olson version: " << version << endl
1673 << "// ICU version: " << U_ICU_VERSION << endl
1674 << "//---------------------------------------------------------" << endl
1675 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl
1676 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl
1677 << "//---------------------------------------------------------" << endl
1678 << endl
1679 << "package com.ibm.icu.impl;" << endl
1680 << endl
1681 << "public final class " ICU4J_TZ_CLASS " {" << endl;
1682
1683 // Emit equivalency lists
1684 bool first1 = true;
1685 java << " public static final String VERSION = \"" + version + "\";" << endl;
1686 java << " public static final String[][] EQUIV = {" << endl;
1687 for (ZoneMap::const_iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {
1688 if (i->second.isAlias() || i->second.getAliases().size() == 0) {
1689 continue;
1690 }
1691 if (!first1) java << "," << endl;
1692 first1 = false;
1693 // The ID of this zone (the canonical zone, to which the
1694 // aliases point) will be sorted into the list, so it
1695 // won't be at position 0. If we want to know which is
1696 // the canonical zone, we should move it to position 0.
1697 java << " { ";
1698 bool first2 = true;
1699 const set<int32_t>& s = i->second.getAliases();
1700 for (set<int32_t>::const_iterator j=s.begin(); j!=s.end(); ++j) {
1701 if (!first2) java << ", ";
1702 java << '"' << zoneIDlist[*j] << '"';
1703 first2 = false;
1704 }
1705 java << " }";
1706 }
1707 java << endl
1708 << " };" << endl;
1709
1710 // Emit country map.
1711 first1 = true;
1712 java << " public static final String[][] COUNTRY = {" << endl;
1713 for (map<string, set<string> >::const_iterator i=countryMap.begin();
1714 i != countryMap.end(); ++i) {
1715 if (!first1) java << "," << endl;
1716 first1 = false;
1717 string country = i->first;
1718 const set<string>& zones(i->second);
1719 java << " { \"" << country << '"';
1720 for (set<string>::const_iterator j=zones.begin();
1721 j != zones.end(); ++j) {
1722 java << ", \"" << *j << '"';
1723 }
1724 java << " }";
1725 }
1726 java << endl
1727 << " };" << endl;
1728
1729 java << "}" << endl;
1730 }
1731
1732 java.close();
1733
1734 if (java) { // recheck error bit
1735 cout << "Finished writing " ICU4J_TZ_CLASS ".java" << endl;
1736 } else {
1737 cerr << "Error: Unable to open/write to " ICU4J_TZ_CLASS ".java" << endl;
1738 return 1;
1739 }
1740
1741 return 0;
1742 }
1743
1744 //eof
1745