1 #ifndef DATE_TIME_TZ_DB_BASE_HPP__ 2 #define DATE_TIME_TZ_DB_BASE_HPP__ 3 4 /* Copyright (c) 2003-2005 CrystalClear Software, Inc. 5 * Subject to the Boost Software License, Version 1.0. 6 * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 7 * Author: Jeff Garland, Bart Garst 8 * $Date$ 9 */ 10 11 #include <map> 12 #include <vector> 13 #include <string> 14 #include <sstream> 15 #include <fstream> 16 #include <stdexcept> 17 #include <boost/tokenizer.hpp> 18 #include <boost/shared_ptr.hpp> 19 #include <boost/throw_exception.hpp> 20 #include <boost/date_time/compiler_config.hpp> 21 #include <boost/date_time/time_zone_names.hpp> 22 #include <boost/date_time/time_zone_base.hpp> 23 #include <boost/date_time/time_parsing.hpp> 24 #include <boost/algorithm/string.hpp> 25 26 namespace boost { 27 namespace date_time { 28 29 //! Exception thrown when tz database cannot locate requested data file 30 class data_not_accessible : public std::logic_error 31 { 32 public: data_not_accessible()33 data_not_accessible() : 34 std::logic_error(std::string("Unable to locate or access the required datafile.")) 35 {} data_not_accessible(const std::string & filespec)36 data_not_accessible(const std::string& filespec) : 37 std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec)) 38 {} 39 }; 40 41 //! Exception thrown when tz database locates incorrect field structure in data file 42 class bad_field_count : public std::out_of_range 43 { 44 public: bad_field_count(const std::string & s)45 bad_field_count(const std::string& s) : 46 std::out_of_range(s) 47 {} 48 }; 49 50 //! Creates a database of time_zones from csv datafile 51 /*! The csv file containing the zone_specs used by the 52 * tz_db_base is intended to be customized by the 53 * library user. When customizing this file (or creating your own) the 54 * file must follow a specific format. 55 * 56 * This first line is expected to contain column headings and is therefore 57 * not processed by the tz_db_base. 58 * 59 * Each record (line) must have eleven fields. Some of those fields can 60 * be empty. Every field (even empty ones) must be enclosed in 61 * double-quotes. 62 * Ex: 63 * @code 64 * "America/Phoenix" <- string enclosed in quotes 65 * "" <- empty field 66 * @endcode 67 * 68 * Some fields represent a length of time. The format of these fields 69 * must be: 70 * @code 71 * "{+|-}hh:mm[:ss]" <- length-of-time format 72 * @endcode 73 * Where the plus or minus is mandatory and the seconds are optional. 74 * 75 * Since some time zones do not use daylight savings it is not always 76 * necessary for every field in a zone_spec to contain a value. All 77 * zone_specs must have at least ID and GMT offset. Zones that use 78 * daylight savings must have all fields filled except: 79 * STD ABBR, STD NAME, DST NAME. You should take note 80 * that DST ABBR is mandatory for zones that use daylight savings 81 * (see field descriptions for further details). 82 * 83 * ******* Fields and their description/details ********* 84 * 85 * ID: 86 * Contains the identifying string for the zone_spec. Any string will 87 * do as long as it's unique. No two ID's can be the same. 88 * 89 * STD ABBR: 90 * STD NAME: 91 * DST ABBR: 92 * DST NAME: 93 * These four are all the names and abbreviations used by the time 94 * zone being described. While any string will do in these fields, 95 * care should be taken. These fields hold the strings that will be 96 * used in the output of many of the local_time classes. 97 * Ex: 98 * @code 99 * time_zone nyc = tz_db.time_zone_from_region("America/New_York"); 100 * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc); 101 * cout << ny_time.to_long_string() << endl; 102 * // 2004-Aug-30 00:00:00 Eastern Daylight Time 103 * cout << ny_time.to_short_string() << endl; 104 * // 2004-Aug-30 00:00:00 EDT 105 * @endcode 106 * 107 * NOTE: The exact format/function names may vary - see local_time 108 * documentation for further details. 109 * 110 * GMT offset: 111 * This is the number of hours added to utc to get the local time 112 * before any daylight savings adjustments are made. Some examples 113 * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours. 114 * The format must follow the length-of-time format described above. 115 * 116 * DST adjustment: 117 * The amount of time added to gmt_offset when daylight savings is in 118 * effect. The format must follow the length-of-time format described 119 * above. 120 * 121 * DST Start Date rule: 122 * This is a specially formatted string that describes the day of year 123 * in which the transition take place. It holds three fields of it's own, 124 * separated by semicolons. 125 * The first field indicates the "nth" weekday of the month. The possible 126 * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth), 127 * and -1 (last). 128 * The second field indicates the day-of-week from 0-6 (Sun=0). 129 * The third field indicates the month from 1-12 (Jan=1). 130 * 131 * Examples are: "-1;5;9"="Last Friday of September", 132 * "2;1;3"="Second Monday of March" 133 * 134 * Start time: 135 * Start time is the number of hours past midnight, on the day of the 136 * start transition, the transition takes place. More simply put, the 137 * time of day the transition is made (in 24 hours format). The format 138 * must follow the length-of-time format described above with the 139 * exception that it must always be positive. 140 * 141 * DST End date rule: 142 * See DST Start date rule. The difference here is this is the day 143 * daylight savings ends (transition to STD). 144 * 145 * End time: 146 * Same as Start time. 147 */ 148 template<class time_zone_type, class rule_type> 149 class tz_db_base { 150 public: 151 /* Having CharT as a template parameter created problems 152 * with posix_time::duration_from_string. Templatizing 153 * duration_from_string was not possible at this time, however, 154 * it should be possible in the future (when poor compilers get 155 * fixed or stop being used). 156 * Since this class was designed to use CharT as a parameter it 157 * is simply typedef'd here to ease converting in back to a 158 * parameter the future */ 159 typedef char char_type; 160 161 typedef typename time_zone_type::base_type time_zone_base_type; 162 typedef typename time_zone_type::time_duration_type time_duration_type; 163 typedef time_zone_names_base<char_type> time_zone_names; 164 typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets; 165 typedef std::basic_string<char_type> string_type; 166 167 //! Constructs an empty database tz_db_base()168 tz_db_base() {} 169 170 //! Process csv data file, may throw exceptions 171 /*! May throw bad_field_count exceptions */ load_from_stream(std::istream & in)172 void load_from_stream(std::istream &in) 173 { 174 std::string buff; 175 while( std::getline(in, buff)) { 176 boost::trim_right(buff); 177 parse_string(buff); 178 } 179 } 180 181 //! Process csv data file, may throw exceptions 182 /*! May throw data_not_accessible, or bad_field_count exceptions */ load_from_file(const std::string & pathspec)183 void load_from_file(const std::string& pathspec) 184 { 185 std::string buff; 186 187 std::ifstream ifs(pathspec.c_str()); 188 if(!ifs){ 189 boost::throw_exception(data_not_accessible(pathspec)); 190 } 191 std::getline(ifs, buff); // first line is column headings 192 this->load_from_stream(ifs); 193 } 194 195 //! returns true if record successfully added to map 196 /*! Takes a region name in the form of "America/Phoenix", and a 197 * time_zone object for that region. The id string must be a unique 198 * name that does not already exist in the database. */ add_record(const string_type & region,boost::shared_ptr<time_zone_base_type> tz)199 bool add_record(const string_type& region, 200 boost::shared_ptr<time_zone_base_type> tz) 201 { 202 typename map_type::value_type p(region, tz); 203 return (m_zone_map.insert(p)).second; 204 } 205 206 //! Returns a time_zone object built from the specs for the given region 207 /*! Returns a time_zone object built from the specs for the given 208 * region. If region does not exist a local_time::record_not_found 209 * exception will be thrown */ 210 boost::shared_ptr<time_zone_base_type> time_zone_from_region(const string_type & region) const211 time_zone_from_region(const string_type& region) const 212 { 213 // get the record 214 typename map_type::const_iterator record = m_zone_map.find(region); 215 if(record == m_zone_map.end()){ 216 return boost::shared_ptr<time_zone_base_type>(); //null pointer 217 } 218 return record->second; 219 } 220 221 //! Returns a vector of strings holding the time zone regions in the database region_list() const222 std::vector<std::string> region_list() const 223 { 224 typedef std::vector<std::string> vector_type; 225 vector_type regions; 226 typename map_type::const_iterator itr = m_zone_map.begin(); 227 while(itr != m_zone_map.end()) { 228 regions.push_back(itr->first); 229 ++itr; 230 } 231 return regions; 232 } 233 234 private: 235 typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type; 236 map_type m_zone_map; 237 238 // start and end rule are of the same type 239 typedef typename rule_type::start_rule::week_num week_num; 240 241 /* TODO: mechanisms need to be put in place to handle different 242 * types of rule specs. parse_rules() only handles nth_kday 243 * rule types. */ 244 245 //! parses rule specs for transition day rules parse_rules(const string_type & sr,const string_type & er) const246 rule_type* parse_rules(const string_type& sr, const string_type& er) const 247 { 248 // start and end rule are of the same type, 249 // both are included here for readability 250 typedef typename rule_type::start_rule start_rule; 251 typedef typename rule_type::end_rule end_rule; 252 253 // these are: [start|end] nth, day, month 254 int s_nth = 0, s_d = 0, s_m = 0; 255 int e_nth = 0, e_d = 0, e_m = 0; 256 split_rule_spec(s_nth, s_d, s_m, sr); 257 split_rule_spec(e_nth, e_d, e_m, er); 258 259 typename start_rule::week_num s_wn, e_wn; 260 s_wn = get_week_num(s_nth); 261 e_wn = get_week_num(e_nth); 262 263 264 return new rule_type(start_rule(s_wn, 265 static_cast<unsigned short>(s_d), 266 static_cast<unsigned short>(s_m)), 267 end_rule(e_wn, 268 static_cast<unsigned short>(e_d), 269 static_cast<unsigned short>(e_m))); 270 } 271 //! helper function for parse_rules() get_week_num(int nth) const272 week_num get_week_num(int nth) const 273 { 274 typedef typename rule_type::start_rule start_rule; 275 switch(nth){ 276 case 1: 277 return start_rule::first; 278 case 2: 279 return start_rule::second; 280 case 3: 281 return start_rule::third; 282 case 4: 283 return start_rule::fourth; 284 case 5: 285 case -1: 286 return start_rule::fifth; 287 default: 288 // shouldn't get here - add error handling later 289 break; 290 } 291 return start_rule::fifth; // silence warnings 292 } 293 294 //! splits the [start|end]_date_rule string into 3 ints split_rule_spec(int & nth,int & d,int & m,string_type rule) const295 void split_rule_spec(int& nth, int& d, int& m, string_type rule) const 296 { 297 typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type; 298 typedef boost::tokenizer<char_separator_type, 299 std::basic_string<char_type>::const_iterator, 300 std::basic_string<char_type> > tokenizer; 301 typedef boost::tokenizer<char_separator_type, 302 std::basic_string<char_type>::const_iterator, 303 std::basic_string<char_type> >::iterator tokenizer_iterator; 304 305 const char_type sep_char[] = { ';', '\0'}; 306 char_separator_type sep(sep_char); 307 tokenizer tokens(rule, sep); // 3 fields 308 309 if ( std::distance ( tokens.begin(), tokens.end ()) != 3 ) { 310 std::ostringstream msg; 311 msg << "Expecting 3 fields, got " 312 << std::distance ( tokens.begin(), tokens.end ()) 313 << " fields in line: " << rule; 314 boost::throw_exception(bad_field_count(msg.str())); 315 } 316 317 tokenizer_iterator tok_iter = tokens.begin(); 318 nth = std::atoi(tok_iter->c_str()); ++tok_iter; 319 d = std::atoi(tok_iter->c_str()); ++tok_iter; 320 m = std::atoi(tok_iter->c_str()); 321 } 322 323 324 //! Take a line from the csv, turn it into a time_zone_type. 325 /*! Take a line from the csv, turn it into a time_zone_type, 326 * and add it to the map. Zone_specs in csv file are expected to 327 * have eleven fields that describe the time zone. Returns true if 328 * zone_spec successfully added to database */ parse_string(string_type & s)329 bool parse_string(string_type& s) 330 { 331 std::vector<string_type> result; 332 typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type; 333 334 token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>()); 335 336 token_iter_type end; 337 while (i != end) { 338 result.push_back(*i); 339 i++; 340 } 341 342 enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET, 343 DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE, 344 END_TIME, FIELD_COUNT }; 345 346 //take a shot at fixing gcc 4.x error 347 const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT); 348 if (result.size() != expected_fields) { 349 std::ostringstream msg; 350 msg << "Expecting " << FIELD_COUNT << " fields, got " 351 << result.size() << " fields in line: " << s; 352 boost::throw_exception(bad_field_count(msg.str())); 353 BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach 354 } 355 356 // initializations 357 bool has_dst = true; 358 if(result[DSTABBR] == std::string()){ 359 has_dst = false; 360 } 361 362 363 // start building components of a time_zone 364 time_zone_names names(result[STDNAME], result[STDABBR], 365 result[DSTNAME], result[DSTABBR]); 366 367 time_duration_type utc_offset = 368 str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]); 369 370 dst_adjustment_offsets adjust(time_duration_type(0,0,0), 371 time_duration_type(0,0,0), 372 time_duration_type(0,0,0)); 373 374 boost::shared_ptr<rule_type> rules; 375 376 if(has_dst){ 377 adjust = dst_adjustment_offsets( 378 str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]), 379 str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]), 380 str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME]) 381 ); 382 383 rules = 384 boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE], 385 result[END_DATE_RULE])); 386 } 387 string_type id(result[ID]); 388 boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules)); 389 return (add_record(id, zone)); 390 391 } 392 393 }; 394 395 } } // namespace 396 397 #endif // DATE_TIME_TZ_DB_BASE_HPP__ 398