1# -*- coding: utf-8 -*- 2""" 3This module offers a generic date/time string parser which is able to parse 4most known formats to represent a date and/or time. 5 6This module attempts to be forgiving with regards to unlikely input formats, 7returning a datetime object even for dates which are ambiguous. If an element 8of a date/time stamp is omitted, the following rules are applied: 9 10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour 11 on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is 12 specified. 13- If a time zone is omitted, a timezone-naive datetime is returned. 14 15If any other elements are missing, they are taken from the 16:class:`datetime.datetime` object passed to the parameter ``default``. If this 17results in a day number exceeding the valid number of days per month, the 18value falls back to the end of the month. 19 20Additional resources about date/time string formats can be found below: 21 22- `A summary of the international standard date and time notation 23 <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_ 24- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_ 25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_ 26- `CPAN ParseDate module 27 <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_ 28- `Java SimpleDateFormat Class 29 <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_ 30""" 31from __future__ import unicode_literals 32 33import datetime 34import re 35import string 36import time 37import warnings 38 39from calendar import monthrange 40from io import StringIO 41 42import six 43from six import binary_type, integer_types, text_type 44 45from decimal import Decimal 46 47from warnings import warn 48 49from .. import relativedelta 50from .. import tz 51 52__all__ = ["parse", "parserinfo"] 53 54 55# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth 56# making public and/or figuring out if there is something we can 57# take off their plate. 58class _timelex(object): 59 # Fractional seconds are sometimes split by a comma 60 _split_decimal = re.compile("([.,])") 61 62 def __init__(self, instream): 63 if six.PY2: 64 # In Python 2, we can't duck type properly because unicode has 65 # a 'decode' function, and we'd be double-decoding 66 if isinstance(instream, (binary_type, bytearray)): 67 instream = instream.decode() 68 else: 69 if getattr(instream, 'decode', None) is not None: 70 instream = instream.decode() 71 72 if isinstance(instream, text_type): 73 instream = StringIO(instream) 74 elif getattr(instream, 'read', None) is None: 75 raise TypeError('Parser must be a string or character stream, not ' 76 '{itype}'.format(itype=instream.__class__.__name__)) 77 78 self.instream = instream 79 self.charstack = [] 80 self.tokenstack = [] 81 self.eof = False 82 83 def get_token(self): 84 """ 85 This function breaks the time string into lexical units (tokens), which 86 can be parsed by the parser. Lexical units are demarcated by changes in 87 the character set, so any continuous string of letters is considered 88 one unit, any continuous string of numbers is considered one unit. 89 90 The main complication arises from the fact that dots ('.') can be used 91 both as separators (e.g. "Sep.20.2009") or decimal points (e.g. 92 "4:30:21.447"). As such, it is necessary to read the full context of 93 any dot-separated strings before breaking it into tokens; as such, this 94 function maintains a "token stack", for when the ambiguous context 95 demands that multiple tokens be parsed at once. 96 """ 97 if self.tokenstack: 98 return self.tokenstack.pop(0) 99 100 seenletters = False 101 token = None 102 state = None 103 104 while not self.eof: 105 # We only realize that we've reached the end of a token when we 106 # find a character that's not part of the current token - since 107 # that character may be part of the next token, it's stored in the 108 # charstack. 109 if self.charstack: 110 nextchar = self.charstack.pop(0) 111 else: 112 nextchar = self.instream.read(1) 113 while nextchar == '\x00': 114 nextchar = self.instream.read(1) 115 116 if not nextchar: 117 self.eof = True 118 break 119 elif not state: 120 # First character of the token - determines if we're starting 121 # to parse a word, a number or something else. 122 token = nextchar 123 if self.isword(nextchar): 124 state = 'a' 125 elif self.isnum(nextchar): 126 state = '0' 127 elif self.isspace(nextchar): 128 token = ' ' 129 break # emit token 130 else: 131 break # emit token 132 elif state == 'a': 133 # If we've already started reading a word, we keep reading 134 # letters until we find something that's not part of a word. 135 seenletters = True 136 if self.isword(nextchar): 137 token += nextchar 138 elif nextchar == '.': 139 token += nextchar 140 state = 'a.' 141 else: 142 self.charstack.append(nextchar) 143 break # emit token 144 elif state == '0': 145 # If we've already started reading a number, we keep reading 146 # numbers until we find something that doesn't fit. 147 if self.isnum(nextchar): 148 token += nextchar 149 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2): 150 token += nextchar 151 state = '0.' 152 else: 153 self.charstack.append(nextchar) 154 break # emit token 155 elif state == 'a.': 156 # If we've seen some letters and a dot separator, continue 157 # parsing, and the tokens will be broken up later. 158 seenletters = True 159 if nextchar == '.' or self.isword(nextchar): 160 token += nextchar 161 elif self.isnum(nextchar) and token[-1] == '.': 162 token += nextchar 163 state = '0.' 164 else: 165 self.charstack.append(nextchar) 166 break # emit token 167 elif state == '0.': 168 # If we've seen at least one dot separator, keep going, we'll 169 # break up the tokens later. 170 if nextchar == '.' or self.isnum(nextchar): 171 token += nextchar 172 elif self.isword(nextchar) and token[-1] == '.': 173 token += nextchar 174 state = 'a.' 175 else: 176 self.charstack.append(nextchar) 177 break # emit token 178 179 if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or 180 token[-1] in '.,')): 181 l = self._split_decimal.split(token) 182 token = l[0] 183 for tok in l[1:]: 184 if tok: 185 self.tokenstack.append(tok) 186 187 if state == '0.' and token.count('.') == 0: 188 token = token.replace(',', '.') 189 190 return token 191 192 def __iter__(self): 193 return self 194 195 def __next__(self): 196 token = self.get_token() 197 if token is None: 198 raise StopIteration 199 200 return token 201 202 def next(self): 203 return self.__next__() # Python 2.x support 204 205 @classmethod 206 def split(cls, s): 207 return list(cls(s)) 208 209 @classmethod 210 def isword(cls, nextchar): 211 """ Whether or not the next character is part of a word """ 212 return nextchar.isalpha() 213 214 @classmethod 215 def isnum(cls, nextchar): 216 """ Whether the next character is part of a number """ 217 return nextchar.isdigit() 218 219 @classmethod 220 def isspace(cls, nextchar): 221 """ Whether the next character is whitespace """ 222 return nextchar.isspace() 223 224 225class _resultbase(object): 226 227 def __init__(self): 228 for attr in self.__slots__: 229 setattr(self, attr, None) 230 231 def _repr(self, classname): 232 l = [] 233 for attr in self.__slots__: 234 value = getattr(self, attr) 235 if value is not None: 236 l.append("%s=%s" % (attr, repr(value))) 237 return "%s(%s)" % (classname, ", ".join(l)) 238 239 def __len__(self): 240 return (sum(getattr(self, attr) is not None 241 for attr in self.__slots__)) 242 243 def __repr__(self): 244 return self._repr(self.__class__.__name__) 245 246 247class parserinfo(object): 248 """ 249 Class which handles what inputs are accepted. Subclass this to customize 250 the language and acceptable values for each parameter. 251 252 :param dayfirst: 253 Whether to interpret the first value in an ambiguous 3-integer date 254 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 255 ``yearfirst`` is set to ``True``, this distinguishes between YDM 256 and YMD. Default is ``False``. 257 258 :param yearfirst: 259 Whether to interpret the first value in an ambiguous 3-integer date 260 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 261 to be the year, otherwise the last number is taken to be the year. 262 Default is ``False``. 263 """ 264 265 # m from a.m/p.m, t from ISO T separator 266 JUMP = [" ", ".", ",", ";", "-", "/", "'", 267 "at", "on", "and", "ad", "m", "t", "of", 268 "st", "nd", "rd", "th"] 269 270 WEEKDAYS = [("Mon", "Monday"), 271 ("Tue", "Tuesday"), # TODO: "Tues" 272 ("Wed", "Wednesday"), 273 ("Thu", "Thursday"), # TODO: "Thurs" 274 ("Fri", "Friday"), 275 ("Sat", "Saturday"), 276 ("Sun", "Sunday")] 277 MONTHS = [("Jan", "January"), 278 ("Feb", "February"), # TODO: "Febr" 279 ("Mar", "March"), 280 ("Apr", "April"), 281 ("May", "May"), 282 ("Jun", "June"), 283 ("Jul", "July"), 284 ("Aug", "August"), 285 ("Sep", "Sept", "September"), 286 ("Oct", "October"), 287 ("Nov", "November"), 288 ("Dec", "December")] 289 HMS = [("h", "hour", "hours"), 290 ("m", "minute", "minutes"), 291 ("s", "second", "seconds")] 292 AMPM = [("am", "a"), 293 ("pm", "p")] 294 UTCZONE = ["UTC", "GMT", "Z"] 295 PERTAIN = ["of"] 296 TZOFFSET = {} 297 # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", 298 # "Anno Domini", "Year of Our Lord"] 299 300 def __init__(self, dayfirst=False, yearfirst=False): 301 self._jump = self._convert(self.JUMP) 302 self._weekdays = self._convert(self.WEEKDAYS) 303 self._months = self._convert(self.MONTHS) 304 self._hms = self._convert(self.HMS) 305 self._ampm = self._convert(self.AMPM) 306 self._utczone = self._convert(self.UTCZONE) 307 self._pertain = self._convert(self.PERTAIN) 308 309 self.dayfirst = dayfirst 310 self.yearfirst = yearfirst 311 312 self._year = time.localtime().tm_year 313 self._century = self._year // 100 * 100 314 315 def _convert(self, lst): 316 dct = {} 317 for i, v in enumerate(lst): 318 if isinstance(v, tuple): 319 for v in v: 320 dct[v.lower()] = i 321 else: 322 dct[v.lower()] = i 323 return dct 324 325 def jump(self, name): 326 return name.lower() in self._jump 327 328 def weekday(self, name): 329 try: 330 return self._weekdays[name.lower()] 331 except KeyError: 332 pass 333 return None 334 335 def month(self, name): 336 try: 337 return self._months[name.lower()] + 1 338 except KeyError: 339 pass 340 return None 341 342 def hms(self, name): 343 try: 344 return self._hms[name.lower()] 345 except KeyError: 346 return None 347 348 def ampm(self, name): 349 try: 350 return self._ampm[name.lower()] 351 except KeyError: 352 return None 353 354 def pertain(self, name): 355 return name.lower() in self._pertain 356 357 def utczone(self, name): 358 return name.lower() in self._utczone 359 360 def tzoffset(self, name): 361 if name in self._utczone: 362 return 0 363 364 return self.TZOFFSET.get(name) 365 366 def convertyear(self, year, century_specified=False): 367 if year < 100 and not century_specified: 368 year += self._century 369 if abs(year - self._year) >= 50: 370 if year < self._year: 371 year += 100 372 else: 373 year -= 100 374 return year 375 376 def validate(self, res): 377 # move to info 378 if res.year is not None: 379 res.year = self.convertyear(res.year, res.century_specified) 380 381 if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z': 382 res.tzname = "UTC" 383 res.tzoffset = 0 384 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): 385 res.tzoffset = 0 386 return True 387 388 389class _ymd(list): 390 def __init__(self, *args, **kwargs): 391 super(self.__class__, self).__init__(*args, **kwargs) 392 self.century_specified = False 393 self.dstridx = None 394 self.mstridx = None 395 self.ystridx = None 396 397 @property 398 def has_year(self): 399 return self.ystridx is not None 400 401 @property 402 def has_month(self): 403 return self.mstridx is not None 404 405 @property 406 def has_day(self): 407 return self.dstridx is not None 408 409 def could_be_day(self, value): 410 if self.has_day: 411 return False 412 elif not self.has_month: 413 return 1 <= value <= 31 414 elif not self.has_year: 415 # Be permissive, assume leapyear 416 month = self[self.mstridx] 417 return 1 <= value <= monthrange(2000, month)[1] 418 else: 419 month = self[self.mstridx] 420 year = self[self.ystridx] 421 return 1 <= value <= monthrange(year, month)[1] 422 423 def append(self, val, label=None): 424 if hasattr(val, '__len__'): 425 if val.isdigit() and len(val) > 2: 426 self.century_specified = True 427 if label not in [None, 'Y']: # pragma: no cover 428 raise ValueError(label) 429 label = 'Y' 430 elif val > 100: 431 self.century_specified = True 432 if label not in [None, 'Y']: # pragma: no cover 433 raise ValueError(label) 434 label = 'Y' 435 436 super(self.__class__, self).append(int(val)) 437 438 if label == 'M': 439 if self.has_month: 440 raise ValueError('Month is already set') 441 self.mstridx = len(self) - 1 442 elif label == 'D': 443 if self.has_day: 444 raise ValueError('Day is already set') 445 self.dstridx = len(self) - 1 446 elif label == 'Y': 447 if self.has_year: 448 raise ValueError('Year is already set') 449 self.ystridx = len(self) - 1 450 451 def resolve_ymd(self, yearfirst, dayfirst): 452 len_ymd = len(self) 453 year, month, day = (None, None, None) 454 455 mstridx = self.mstridx 456 457 if len_ymd > 3: 458 raise ValueError("More than three YMD values") 459 elif len_ymd == 1 or (mstridx is not None and len_ymd == 2): 460 # One member, or two members with a month string 461 if mstridx is not None: 462 month = self[mstridx] 463 del self[mstridx] 464 465 if len_ymd > 1 or mstridx is None: 466 if self[0] > 31: 467 year = self[0] 468 else: 469 day = self[0] 470 471 elif len_ymd == 2: 472 # Two members with numbers 473 if self[0] > 31: 474 # 99-01 475 year, month = self 476 elif self[1] > 31: 477 # 01-99 478 month, year = self 479 elif dayfirst and self[1] <= 12: 480 # 13-01 481 day, month = self 482 else: 483 # 01-13 484 month, day = self 485 486 elif len_ymd == 3: 487 # Three members 488 if mstridx == 0: 489 if self[1] > 31: 490 # Apr-2003-25 491 month, year, day = self 492 else: 493 month, day, year = self 494 elif mstridx == 1: 495 if self[0] > 31 or (yearfirst and self[2] <= 31): 496 # 99-Jan-01 497 year, month, day = self 498 else: 499 # 01-Jan-01 500 # Give precendence to day-first, since 501 # two-digit years is usually hand-written. 502 day, month, year = self 503 504 elif mstridx == 2: 505 # WTF!? 506 if self[1] > 31: 507 # 01-99-Jan 508 day, year, month = self 509 else: 510 # 99-01-Jan 511 year, day, month = self 512 513 else: 514 if (self[0] > 31 or 515 self.ystridx == 0 or 516 (yearfirst and self[1] <= 12 and self[2] <= 31)): 517 # 99-01-01 518 if dayfirst and self[2] <= 12: 519 year, day, month = self 520 else: 521 year, month, day = self 522 elif self[0] > 12 or (dayfirst and self[1] <= 12): 523 # 13-01-01 524 day, month, year = self 525 else: 526 # 01-13-01 527 month, day, year = self 528 529 return year, month, day 530 531 532class parser(object): 533 def __init__(self, info=None): 534 self.info = info or parserinfo() 535 536 def parse(self, timestr, default=None, 537 ignoretz=False, tzinfos=None, **kwargs): 538 """ 539 Parse the date/time string into a :class:`datetime.datetime` object. 540 541 :param timestr: 542 Any date/time string using the supported formats. 543 544 :param default: 545 The default datetime object, if this is a datetime object and not 546 ``None``, elements specified in ``timestr`` replace elements in the 547 default object. 548 549 :param ignoretz: 550 If set ``True``, time zones in parsed strings are ignored and a 551 naive :class:`datetime.datetime` object is returned. 552 553 :param tzinfos: 554 Additional time zone names / aliases which may be present in the 555 string. This argument maps time zone names (and optionally offsets 556 from those time zones) to time zones. This parameter can be a 557 dictionary with timezone aliases mapping time zone names to time 558 zones or a function taking two parameters (``tzname`` and 559 ``tzoffset``) and returning a time zone. 560 561 The timezones to which the names are mapped can be an integer 562 offset from UTC in seconds or a :class:`tzinfo` object. 563 564 .. doctest:: 565 :options: +NORMALIZE_WHITESPACE 566 567 >>> from dateutil.parser import parse 568 >>> from dateutil.tz import gettz 569 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 570 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 571 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 572 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 573 datetime.datetime(2012, 1, 19, 17, 21, 574 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 575 576 This parameter is ignored if ``ignoretz`` is set. 577 578 :param \\*\\*kwargs: 579 Keyword arguments as passed to ``_parse()``. 580 581 :return: 582 Returns a :class:`datetime.datetime` object or, if the 583 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 584 first element being a :class:`datetime.datetime` object, the second 585 a tuple containing the fuzzy tokens. 586 587 :raises ValueError: 588 Raised for invalid or unknown string format, if the provided 589 :class:`tzinfo` is not in a valid format, or if an invalid date 590 would be created. 591 592 :raises TypeError: 593 Raised for non-string or character stream input. 594 595 :raises OverflowError: 596 Raised if the parsed date exceeds the largest valid C integer on 597 your system. 598 """ 599 600 if default is None: 601 default = datetime.datetime.now().replace(hour=0, minute=0, 602 second=0, microsecond=0) 603 604 res, skipped_tokens = self._parse(timestr, **kwargs) 605 606 if res is None: 607 raise ValueError("Unknown string format:", timestr) 608 609 if len(res) == 0: 610 raise ValueError("String does not contain a date:", timestr) 611 612 ret = self._build_naive(res, default) 613 614 if not ignoretz: 615 ret = self._build_tzaware(ret, res, tzinfos) 616 617 if kwargs.get('fuzzy_with_tokens', False): 618 return ret, skipped_tokens 619 else: 620 return ret 621 622 class _result(_resultbase): 623 __slots__ = ["year", "month", "day", "weekday", 624 "hour", "minute", "second", "microsecond", 625 "tzname", "tzoffset", "ampm","any_unused_tokens"] 626 627 def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False, 628 fuzzy_with_tokens=False): 629 """ 630 Private method which performs the heavy lifting of parsing, called from 631 ``parse()``, which passes on its ``kwargs`` to this function. 632 633 :param timestr: 634 The string to parse. 635 636 :param dayfirst: 637 Whether to interpret the first value in an ambiguous 3-integer date 638 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 639 ``yearfirst`` is set to ``True``, this distinguishes between YDM 640 and YMD. If set to ``None``, this value is retrieved from the 641 current :class:`parserinfo` object (which itself defaults to 642 ``False``). 643 644 :param yearfirst: 645 Whether to interpret the first value in an ambiguous 3-integer date 646 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 647 to be the year, otherwise the last number is taken to be the year. 648 If this is set to ``None``, the value is retrieved from the current 649 :class:`parserinfo` object (which itself defaults to ``False``). 650 651 :param fuzzy: 652 Whether to allow fuzzy parsing, allowing for string like "Today is 653 January 1, 2047 at 8:21:00AM". 654 655 :param fuzzy_with_tokens: 656 If ``True``, ``fuzzy`` is automatically set to True, and the parser 657 will return a tuple where the first element is the parsed 658 :class:`datetime.datetime` datetimestamp and the second element is 659 a tuple containing the portions of the string which were ignored: 660 661 .. doctest:: 662 663 >>> from dateutil.parser import parse 664 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 665 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 666 667 """ 668 if fuzzy_with_tokens: 669 fuzzy = True 670 671 info = self.info 672 673 if dayfirst is None: 674 dayfirst = info.dayfirst 675 676 if yearfirst is None: 677 yearfirst = info.yearfirst 678 679 res = self._result() 680 l = _timelex.split(timestr) # Splits the timestr into tokens 681 682 skipped_idxs = [] 683 684 # year/month/day list 685 ymd = _ymd() 686 687 len_l = len(l) 688 i = 0 689 try: 690 while i < len_l: 691 692 # Check if it's a number 693 value_repr = l[i] 694 try: 695 value = float(value_repr) 696 except ValueError: 697 value = None 698 699 if value is not None: 700 # Numeric token 701 i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy) 702 703 # Check weekday 704 elif info.weekday(l[i]) is not None: 705 value = info.weekday(l[i]) 706 res.weekday = value 707 708 # Check month name 709 elif info.month(l[i]) is not None: 710 value = info.month(l[i]) 711 ymd.append(value, 'M') 712 713 if i + 1 < len_l: 714 if l[i + 1] in ('-', '/'): 715 # Jan-01[-99] 716 sep = l[i + 1] 717 ymd.append(l[i + 2]) 718 719 if i + 3 < len_l and l[i + 3] == sep: 720 # Jan-01-99 721 ymd.append(l[i + 4]) 722 i += 2 723 724 i += 2 725 726 elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and 727 info.pertain(l[i + 2])): 728 # Jan of 01 729 # In this case, 01 is clearly year 730 if l[i + 4].isdigit(): 731 # Convert it here to become unambiguous 732 value = int(l[i + 4]) 733 year = str(info.convertyear(value)) 734 ymd.append(year, 'Y') 735 else: 736 # Wrong guess 737 pass 738 # TODO: not hit in tests 739 i += 4 740 741 # Check am/pm 742 elif info.ampm(l[i]) is not None: 743 value = info.ampm(l[i]) 744 val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy) 745 746 if val_is_ampm: 747 res.hour = self._adjust_ampm(res.hour, value) 748 res.ampm = value 749 750 elif fuzzy: 751 skipped_idxs.append(i) 752 753 # Check for a timezone name 754 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]): 755 res.tzname = l[i] 756 res.tzoffset = info.tzoffset(res.tzname) 757 758 # Check for something like GMT+3, or BRST+3. Notice 759 # that it doesn't mean "I am 3 hours after GMT", but 760 # "my time +3 is GMT". If found, we reverse the 761 # logic so that timezone parsing code will get it 762 # right. 763 if i + 1 < len_l and l[i + 1] in ('+', '-'): 764 l[i + 1] = ('+', '-')[l[i + 1] == '+'] 765 res.tzoffset = None 766 if info.utczone(res.tzname): 767 # With something like GMT+3, the timezone 768 # is *not* GMT. 769 res.tzname = None 770 771 # Check for a numbered timezone 772 elif res.hour is not None and l[i] in ('+', '-'): 773 signal = (-1, 1)[l[i] == '+'] 774 len_li = len(l[i + 1]) 775 776 # TODO: check that l[i + 1] is integer? 777 if len_li == 4: 778 # -0300 779 hour_offset = int(l[i + 1][:2]) 780 min_offset = int(l[i + 1][2:]) 781 elif i + 2 < len_l and l[i + 2] == ':': 782 # -03:00 783 hour_offset = int(l[i + 1]) 784 min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like? 785 i += 2 786 elif len_li <= 2: 787 # -[0]3 788 hour_offset = int(l[i + 1][:2]) 789 min_offset = 0 790 else: 791 raise ValueError(timestr) 792 793 res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60) 794 795 # Look for a timezone name between parenthesis 796 if (i + 5 < len_l and 797 info.jump(l[i + 2]) and l[i + 3] == '(' and 798 l[i + 5] == ')' and 799 3 <= len(l[i + 4]) and 800 self._could_be_tzname(res.hour, res.tzname, 801 None, l[i + 4])): 802 # -0300 (BRST) 803 res.tzname = l[i + 4] 804 i += 4 805 806 i += 1 807 808 # Check jumps 809 elif not (info.jump(l[i]) or fuzzy): 810 raise ValueError(timestr) 811 812 else: 813 skipped_idxs.append(i) 814 i += 1 815 816 # Process year/month/day 817 year, month, day = ymd.resolve_ymd(yearfirst, dayfirst) 818 819 res.century_specified = ymd.century_specified 820 res.year = year 821 res.month = month 822 res.day = day 823 824 except (IndexError, ValueError): 825 return None, None 826 827 if not info.validate(res): 828 return None, None 829 830 if fuzzy_with_tokens: 831 skipped_tokens = self._recombine_skipped(l, skipped_idxs) 832 return res, tuple(skipped_tokens) 833 else: 834 return res, None 835 836 def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy): 837 # Token is a number 838 value_repr = tokens[idx] 839 try: 840 value = self._to_decimal(value_repr) 841 except Exception as e: 842 six.raise_from(ValueError('Unknown numeric token'), e) 843 844 len_li = len(value_repr) 845 846 len_l = len(tokens) 847 848 if (len(ymd) == 3 and len_li in (2, 4) and 849 res.hour is None and 850 (idx + 1 >= len_l or 851 (tokens[idx + 1] != ':' and 852 info.hms(tokens[idx + 1]) is None))): 853 # 19990101T23[59] 854 s = tokens[idx] 855 res.hour = int(s[:2]) 856 857 if len_li == 4: 858 res.minute = int(s[2:]) 859 860 elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6): 861 # YYMMDD or HHMMSS[.ss] 862 s = tokens[idx] 863 864 if not ymd and '.' not in tokens[idx]: 865 ymd.append(s[:2]) 866 ymd.append(s[2:4]) 867 ymd.append(s[4:]) 868 else: 869 # 19990101T235959[.59] 870 871 # TODO: Check if res attributes already set. 872 res.hour = int(s[:2]) 873 res.minute = int(s[2:4]) 874 res.second, res.microsecond = self._parsems(s[4:]) 875 876 elif len_li in (8, 12, 14): 877 # YYYYMMDD 878 s = tokens[idx] 879 ymd.append(s[:4], 'Y') 880 ymd.append(s[4:6]) 881 ymd.append(s[6:8]) 882 883 if len_li > 8: 884 res.hour = int(s[8:10]) 885 res.minute = int(s[10:12]) 886 887 if len_li > 12: 888 res.second = int(s[12:]) 889 890 elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None: 891 # HH[ ]h or MM[ ]m or SS[.ss][ ]s 892 hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True) 893 (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx) 894 if hms is not None: 895 # TODO: checking that hour/minute/second are not 896 # already set? 897 self._assign_hms(res, value_repr, hms) 898 899 elif idx + 2 < len_l and tokens[idx + 1] == ':': 900 # HH:MM[:SS[.ss]] 901 res.hour = int(value) 902 value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this? 903 (res.minute, res.second) = self._parse_min_sec(value) 904 905 if idx + 4 < len_l and tokens[idx + 3] == ':': 906 res.second, res.microsecond = self._parsems(tokens[idx + 4]) 907 908 idx += 2 909 910 idx += 2 911 912 elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'): 913 sep = tokens[idx + 1] 914 ymd.append(value_repr) 915 916 if idx + 2 < len_l and not info.jump(tokens[idx + 2]): 917 if tokens[idx + 2].isdigit(): 918 # 01-01[-01] 919 ymd.append(tokens[idx + 2]) 920 else: 921 # 01-Jan[-01] 922 value = info.month(tokens[idx + 2]) 923 924 if value is not None: 925 ymd.append(value, 'M') 926 else: 927 raise ValueError() 928 929 if idx + 3 < len_l and tokens[idx + 3] == sep: 930 # We have three members 931 value = info.month(tokens[idx + 4]) 932 933 if value is not None: 934 ymd.append(value, 'M') 935 else: 936 ymd.append(tokens[idx + 4]) 937 idx += 2 938 939 idx += 1 940 idx += 1 941 942 elif idx + 1 >= len_l or info.jump(tokens[idx + 1]): 943 if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None: 944 # 12 am 945 hour = int(value) 946 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2])) 947 idx += 1 948 else: 949 # Year, month or day 950 ymd.append(value) 951 idx += 1 952 953 elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24): 954 # 12am 955 hour = int(value) 956 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1])) 957 idx += 1 958 959 elif ymd.could_be_day(value): 960 ymd.append(value) 961 962 elif not fuzzy: 963 raise ValueError() 964 965 return idx 966 967 def _find_hms_idx(self, idx, tokens, info, allow_jump): 968 len_l = len(tokens) 969 970 if idx+1 < len_l and info.hms(tokens[idx+1]) is not None: 971 # There is an "h", "m", or "s" label following this token. We take 972 # assign the upcoming label to the current token. 973 # e.g. the "12" in 12h" 974 hms_idx = idx + 1 975 976 elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and 977 info.hms(tokens[idx+2]) is not None): 978 # There is a space and then an "h", "m", or "s" label. 979 # e.g. the "12" in "12 h" 980 hms_idx = idx + 2 981 982 elif idx > 0 and info.hms(tokens[idx-1]) is not None: 983 # There is a "h", "m", or "s" preceeding this token. Since neither 984 # of the previous cases was hit, there is no label following this 985 # token, so we use the previous label. 986 # e.g. the "04" in "12h04" 987 hms_idx = idx-1 988 989 elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and 990 info.hms(tokens[idx-2]) is not None): 991 # If we are looking at the final token, we allow for a 992 # backward-looking check to skip over a space. 993 # TODO: Are we sure this is the right condition here? 994 hms_idx = idx - 2 995 996 else: 997 hms_idx = None 998 999 return hms_idx 1000 1001 def _assign_hms(self, res, value_repr, hms): 1002 # See GH issue #427, fixing float rounding 1003 value = self._to_decimal(value_repr) 1004 1005 if hms == 0: 1006 # Hour 1007 res.hour = int(value) 1008 if value % 1: 1009 res.minute = int(60*(value % 1)) 1010 1011 elif hms == 1: 1012 (res.minute, res.second) = self._parse_min_sec(value) 1013 1014 elif hms == 2: 1015 (res.second, res.microsecond) = self._parsems(value_repr) 1016 1017 def _could_be_tzname(self, hour, tzname, tzoffset, token): 1018 return (hour is not None and 1019 tzname is None and 1020 tzoffset is None and 1021 len(token) <= 5 and 1022 all(x in string.ascii_uppercase for x in token)) 1023 1024 def _ampm_valid(self, hour, ampm, fuzzy): 1025 """ 1026 For fuzzy parsing, 'a' or 'am' (both valid English words) 1027 may erroneously trigger the AM/PM flag. Deal with that 1028 here. 1029 """ 1030 val_is_ampm = True 1031 1032 # If there's already an AM/PM flag, this one isn't one. 1033 if fuzzy and ampm is not None: 1034 val_is_ampm = False 1035 1036 # If AM/PM is found and hour is not, raise a ValueError 1037 if hour is None: 1038 if fuzzy: 1039 val_is_ampm = False 1040 else: 1041 raise ValueError('No hour specified with AM or PM flag.') 1042 elif not 0 <= hour <= 12: 1043 # If AM/PM is found, it's a 12 hour clock, so raise 1044 # an error for invalid range 1045 if fuzzy: 1046 val_is_ampm = False 1047 else: 1048 raise ValueError('Invalid hour specified for 12-hour clock.') 1049 1050 return val_is_ampm 1051 1052 def _adjust_ampm(self, hour, ampm): 1053 if hour < 12 and ampm == 1: 1054 hour += 12 1055 elif hour == 12 and ampm == 0: 1056 hour = 0 1057 return hour 1058 1059 def _parse_min_sec(self, value): 1060 # TODO: Every usage of this function sets res.second to the return 1061 # value. Are there any cases where second will be returned as None and 1062 # we *dont* want to set res.second = None? 1063 minute = int(value) 1064 second = None 1065 1066 sec_remainder = value % 1 1067 if sec_remainder: 1068 second = int(60 * sec_remainder) 1069 return (minute, second) 1070 1071 def _parsems(self, value): 1072 """Parse a I[.F] seconds value into (seconds, microseconds).""" 1073 if "." not in value: 1074 return int(value), 0 1075 else: 1076 i, f = value.split(".") 1077 return int(i), int(f.ljust(6, "0")[:6]) 1078 1079 def _parse_hms(self, idx, tokens, info, hms_idx): 1080 # TODO: Is this going to admit a lot of false-positives for when we 1081 # just happen to have digits and "h", "m" or "s" characters in non-date 1082 # text? I guess hex hashes won't have that problem, but there's plenty 1083 # of random junk out there. 1084 if hms_idx is None: 1085 hms = None 1086 new_idx = idx 1087 elif hms_idx > idx: 1088 hms = info.hms(tokens[hms_idx]) 1089 new_idx = hms_idx 1090 else: 1091 # Looking backwards, increment one. 1092 hms = info.hms(tokens[hms_idx]) + 1 1093 new_idx = idx 1094 1095 return (new_idx, hms) 1096 1097 def _recombine_skipped(self, tokens, skipped_idxs): 1098 """ 1099 >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] 1100 >>> skipped_idxs = [0, 1, 2, 5] 1101 >>> _recombine_skipped(tokens, skipped_idxs) 1102 ["foo bar", "baz"] 1103 """ 1104 skipped_tokens = [] 1105 for i, idx in enumerate(sorted(skipped_idxs)): 1106 if i > 0 and idx - 1 == skipped_idxs[i - 1]: 1107 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] 1108 else: 1109 skipped_tokens.append(tokens[idx]) 1110 1111 return skipped_tokens 1112 1113 def _build_tzinfo(self, tzinfos, tzname, tzoffset): 1114 if callable(tzinfos): 1115 tzdata = tzinfos(tzname, tzoffset) 1116 else: 1117 tzdata = tzinfos.get(tzname) 1118 1119 if isinstance(tzdata, datetime.tzinfo): 1120 tzinfo = tzdata 1121 elif isinstance(tzdata, text_type): 1122 tzinfo = tz.tzstr(tzdata) 1123 elif isinstance(tzdata, integer_types): 1124 tzinfo = tz.tzoffset(tzname, tzdata) 1125 else: 1126 raise ValueError("Offset must be tzinfo subclass, " 1127 "tz string, or int offset.") 1128 return tzinfo 1129 1130 def _build_tzaware(self, naive, res, tzinfos): 1131 if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)): 1132 tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset) 1133 aware = naive.replace(tzinfo=tzinfo) 1134 aware = self._assign_tzname(aware, res.tzname) 1135 1136 elif res.tzname and res.tzname in time.tzname: 1137 aware = naive.replace(tzinfo=tz.tzlocal()) 1138 1139 # Handle ambiguous local datetime 1140 aware = self._assign_tzname(aware, res.tzname) 1141 1142 # This is mostly relevant for winter GMT zones parsed in the UK 1143 if (aware.tzname() != res.tzname and 1144 res.tzname in self.info.UTCZONE): 1145 aware = aware.replace(tzinfo=tz.tzutc()) 1146 1147 elif res.tzoffset == 0: 1148 aware = naive.replace(tzinfo=tz.tzutc()) 1149 1150 elif res.tzoffset: 1151 aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) 1152 1153 elif not res.tzname and not res.tzoffset: 1154 # i.e. no timezone information was found. 1155 aware = naive 1156 1157 elif res.tzname: 1158 # tz-like string was parsed but we don't know what to do 1159 # with it 1160 warnings.warn("tzname {tzname} identified but not understood. " 1161 "Pass `tzinfos` argument in order to correctly " 1162 "return a timezone-aware datetime. In a future " 1163 "version, this raise an " 1164 "exception.".format(tzname=res.tzname), 1165 category=UnknownTimezoneWarning) 1166 aware = naive 1167 1168 return aware 1169 1170 def _build_naive(self, res, default): 1171 repl = {} 1172 for attr in ("year", "month", "day", "hour", 1173 "minute", "second", "microsecond"): 1174 value = getattr(res, attr) 1175 if value is not None: 1176 repl[attr] = value 1177 1178 if 'day' not in repl: 1179 # If the default day exceeds the last day of the month, fall back 1180 # to the end of the month. 1181 cyear = default.year if res.year is None else res.year 1182 cmonth = default.month if res.month is None else res.month 1183 cday = default.day if res.day is None else res.day 1184 1185 if cday > monthrange(cyear, cmonth)[1]: 1186 repl['day'] = monthrange(cyear, cmonth)[1] 1187 1188 naive = default.replace(**repl) 1189 1190 if res.weekday is not None and not res.day: 1191 naive = naive + relativedelta.relativedelta(weekday=res.weekday) 1192 1193 return naive 1194 1195 def _assign_tzname(self, dt, tzname): 1196 if dt.tzname() != tzname: 1197 new_dt = tz.enfold(dt, fold=1) 1198 if new_dt.tzname() == tzname: 1199 return new_dt 1200 1201 return dt 1202 1203 def _to_decimal(self, val): 1204 try: 1205 return Decimal(val) 1206 except Exception as e: 1207 msg = "Could not convert %s to decimal" % val 1208 six.raise_from(ValueError(msg), e) 1209 1210 1211DEFAULTPARSER = parser() 1212 1213 1214def parse(timestr, parserinfo=None, **kwargs): 1215 """ 1216 1217 Parse a string in one of the supported formats, using the 1218 ``parserinfo`` parameters. 1219 1220 :param timestr: 1221 A string containing a date/time stamp. 1222 1223 :param parserinfo: 1224 A :class:`parserinfo` object containing parameters for the parser. 1225 If ``None``, the default arguments to the :class:`parserinfo` 1226 constructor are used. 1227 1228 The ``**kwargs`` parameter takes the following keyword arguments: 1229 1230 :param default: 1231 The default datetime object, if this is a datetime object and not 1232 ``None``, elements specified in ``timestr`` replace elements in the 1233 default object. 1234 1235 :param ignoretz: 1236 If set ``True``, time zones in parsed strings are ignored and a naive 1237 :class:`datetime` object is returned. 1238 1239 :param tzinfos: 1240 Additional time zone names / aliases which may be present in the 1241 string. This argument maps time zone names (and optionally offsets 1242 from those time zones) to time zones. This parameter can be a 1243 dictionary with timezone aliases mapping time zone names to time 1244 zones or a function taking two parameters (``tzname`` and 1245 ``tzoffset``) and returning a time zone. 1246 1247 The timezones to which the names are mapped can be an integer 1248 offset from UTC in seconds or a :class:`tzinfo` object. 1249 1250 .. doctest:: 1251 :options: +NORMALIZE_WHITESPACE 1252 1253 >>> from dateutil.parser import parse 1254 >>> from dateutil.tz import gettz 1255 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 1256 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 1257 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 1258 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 1259 datetime.datetime(2012, 1, 19, 17, 21, 1260 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 1261 1262 This parameter is ignored if ``ignoretz`` is set. 1263 1264 :param dayfirst: 1265 Whether to interpret the first value in an ambiguous 3-integer date 1266 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 1267 ``yearfirst`` is set to ``True``, this distinguishes between YDM and 1268 YMD. If set to ``None``, this value is retrieved from the current 1269 :class:`parserinfo` object (which itself defaults to ``False``). 1270 1271 :param yearfirst: 1272 Whether to interpret the first value in an ambiguous 3-integer date 1273 (e.g. 01/05/09) as the year. If ``True``, the first number is taken to 1274 be the year, otherwise the last number is taken to be the year. If 1275 this is set to ``None``, the value is retrieved from the current 1276 :class:`parserinfo` object (which itself defaults to ``False``). 1277 1278 :param fuzzy: 1279 Whether to allow fuzzy parsing, allowing for string like "Today is 1280 January 1, 2047 at 8:21:00AM". 1281 1282 :param fuzzy_with_tokens: 1283 If ``True``, ``fuzzy`` is automatically set to True, and the parser 1284 will return a tuple where the first element is the parsed 1285 :class:`datetime.datetime` datetimestamp and the second element is 1286 a tuple containing the portions of the string which were ignored: 1287 1288 .. doctest:: 1289 1290 >>> from dateutil.parser import parse 1291 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 1292 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 1293 1294 :return: 1295 Returns a :class:`datetime.datetime` object or, if the 1296 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 1297 first element being a :class:`datetime.datetime` object, the second 1298 a tuple containing the fuzzy tokens. 1299 1300 :raises ValueError: 1301 Raised for invalid or unknown string format, if the provided 1302 :class:`tzinfo` is not in a valid format, or if an invalid date 1303 would be created. 1304 1305 :raises OverflowError: 1306 Raised if the parsed date exceeds the largest valid C integer on 1307 your system. 1308 """ 1309 if parserinfo: 1310 return parser(parserinfo).parse(timestr, **kwargs) 1311 else: 1312 return DEFAULTPARSER.parse(timestr, **kwargs) 1313 1314 1315class _tzparser(object): 1316 1317 class _result(_resultbase): 1318 1319 __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset", 1320 "start", "end"] 1321 1322 class _attr(_resultbase): 1323 __slots__ = ["month", "week", "weekday", 1324 "yday", "jyday", "day", "time"] 1325 1326 def __repr__(self): 1327 return self._repr("") 1328 1329 def __init__(self): 1330 _resultbase.__init__(self) 1331 self.start = self._attr() 1332 self.end = self._attr() 1333 1334 def parse(self, tzstr): 1335 res = self._result() 1336 l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x] 1337 used_idxs = list() 1338 try: 1339 1340 len_l = len(l) 1341 1342 i = 0 1343 while i < len_l: 1344 # BRST+3[BRDT[+2]] 1345 j = i 1346 while j < len_l and not [x for x in l[j] 1347 if x in "0123456789:,-+"]: 1348 j += 1 1349 if j != i: 1350 if not res.stdabbr: 1351 offattr = "stdoffset" 1352 res.stdabbr = "".join(l[i:j]) 1353 else: 1354 offattr = "dstoffset" 1355 res.dstabbr = "".join(l[i:j]) 1356 1357 for ii in range(j): 1358 used_idxs.append(ii) 1359 i = j 1360 if (i < len_l and (l[i] in ('+', '-') or l[i][0] in 1361 "0123456789")): 1362 if l[i] in ('+', '-'): 1363 # Yes, that's right. See the TZ variable 1364 # documentation. 1365 signal = (1, -1)[l[i] == '+'] 1366 used_idxs.append(i) 1367 i += 1 1368 else: 1369 signal = -1 1370 len_li = len(l[i]) 1371 if len_li == 4: 1372 # -0300 1373 setattr(res, offattr, (int(l[i][:2]) * 3600 + 1374 int(l[i][2:]) * 60) * signal) 1375 elif i + 1 < len_l and l[i + 1] == ':': 1376 # -03:00 1377 setattr(res, offattr, 1378 (int(l[i]) * 3600 + 1379 int(l[i + 2]) * 60) * signal) 1380 used_idxs.append(i) 1381 i += 2 1382 elif len_li <= 2: 1383 # -[0]3 1384 setattr(res, offattr, 1385 int(l[i][:2]) * 3600 * signal) 1386 else: 1387 return None 1388 used_idxs.append(i) 1389 i += 1 1390 if res.dstabbr: 1391 break 1392 else: 1393 break 1394 1395 1396 if i < len_l: 1397 for j in range(i, len_l): 1398 if l[j] == ';': 1399 l[j] = ',' 1400 1401 assert l[i] == ',' 1402 1403 i += 1 1404 1405 if i >= len_l: 1406 pass 1407 elif (8 <= l.count(',') <= 9 and 1408 not [y for x in l[i:] if x != ',' 1409 for y in x if y not in "0123456789+-"]): 1410 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600] 1411 for x in (res.start, res.end): 1412 x.month = int(l[i]) 1413 used_idxs.append(i) 1414 i += 2 1415 if l[i] == '-': 1416 value = int(l[i + 1]) * -1 1417 used_idxs.append(i) 1418 i += 1 1419 else: 1420 value = int(l[i]) 1421 used_idxs.append(i) 1422 i += 2 1423 if value: 1424 x.week = value 1425 x.weekday = (int(l[i]) - 1) % 7 1426 else: 1427 x.day = int(l[i]) 1428 used_idxs.append(i) 1429 i += 2 1430 x.time = int(l[i]) 1431 used_idxs.append(i) 1432 i += 2 1433 if i < len_l: 1434 if l[i] in ('-', '+'): 1435 signal = (-1, 1)[l[i] == "+"] 1436 used_idxs.append(i) 1437 i += 1 1438 else: 1439 signal = 1 1440 used_idxs.append(i) 1441 res.dstoffset = (res.stdoffset + int(l[i]) * signal) 1442 1443 # This was a made-up format that is not in normal use 1444 warn(('Parsed time zone "%s"' % tzstr) + 1445 'is in a non-standard dateutil-specific format, which ' + 1446 'is now deprecated; support for parsing this format ' + 1447 'will be removed in future versions. It is recommended ' + 1448 'that you switch to a standard format like the GNU ' + 1449 'TZ variable format.', tz.DeprecatedTzFormatWarning) 1450 elif (l.count(',') == 2 and l[i:].count('/') <= 2 and 1451 not [y for x in l[i:] if x not in (',', '/', 'J', 'M', 1452 '.', '-', ':') 1453 for y in x if y not in "0123456789"]): 1454 for x in (res.start, res.end): 1455 if l[i] == 'J': 1456 # non-leap year day (1 based) 1457 used_idxs.append(i) 1458 i += 1 1459 x.jyday = int(l[i]) 1460 elif l[i] == 'M': 1461 # month[-.]week[-.]weekday 1462 used_idxs.append(i) 1463 i += 1 1464 x.month = int(l[i]) 1465 used_idxs.append(i) 1466 i += 1 1467 assert l[i] in ('-', '.') 1468 used_idxs.append(i) 1469 i += 1 1470 x.week = int(l[i]) 1471 if x.week == 5: 1472 x.week = -1 1473 used_idxs.append(i) 1474 i += 1 1475 assert l[i] in ('-', '.') 1476 used_idxs.append(i) 1477 i += 1 1478 x.weekday = (int(l[i]) - 1) % 7 1479 else: 1480 # year day (zero based) 1481 x.yday = int(l[i]) + 1 1482 1483 used_idxs.append(i) 1484 i += 1 1485 1486 if i < len_l and l[i] == '/': 1487 used_idxs.append(i) 1488 i += 1 1489 # start time 1490 len_li = len(l[i]) 1491 if len_li == 4: 1492 # -0300 1493 x.time = (int(l[i][:2]) * 3600 + 1494 int(l[i][2:]) * 60) 1495 elif i + 1 < len_l and l[i + 1] == ':': 1496 # -03:00 1497 x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60 1498 used_idxs.append(i) 1499 i += 2 1500 if i + 1 < len_l and l[i + 1] == ':': 1501 used_idxs.append(i) 1502 i += 2 1503 x.time += int(l[i]) 1504 elif len_li <= 2: 1505 # -[0]3 1506 x.time = (int(l[i][:2]) * 3600) 1507 else: 1508 return None 1509 used_idxs.append(i) 1510 i += 1 1511 1512 assert i == len_l or l[i] == ',' 1513 1514 i += 1 1515 1516 assert i >= len_l 1517 1518 except (IndexError, ValueError, AssertionError): 1519 return None 1520 1521 unused_idxs = set(range(len_l)).difference(used_idxs) 1522 res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"}) 1523 return res 1524 1525 1526DEFAULTTZPARSER = _tzparser() 1527 1528 1529def _parsetz(tzstr): 1530 return DEFAULTTZPARSER.parse(tzstr) 1531 1532class UnknownTimezoneWarning(RuntimeWarning): 1533 """Raised when the parser finds a timezone it cannot parse into a tzinfo""" 1534# vim:ts=4:sw=4:et 1535