• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2"""
3This module offers a generic date/time string parser which is able to parse
4most known formats to represent a date and/or time.
5
6This module attempts to be forgiving with regards to unlikely input formats,
7returning a datetime object even for dates which are ambiguous. If an element
8of a date/time stamp is omitted, the following rules are applied:
9
10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
11  on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
12  specified.
13- If a time zone is omitted, a timezone-naive datetime is returned.
14
15If any other elements are missing, they are taken from the
16:class:`datetime.datetime` object passed to the parameter ``default``. If this
17results in a day number exceeding the valid number of days per month, the
18value falls back to the end of the month.
19
20Additional resources about date/time string formats can be found below:
21
22- `A summary of the international standard date and time notation
23  <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
24- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
26- `CPAN ParseDate module
27  <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
28- `Java SimpleDateFormat Class
29  <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
30"""
31from __future__ import unicode_literals
32
33import datetime
34import re
35import string
36import time
37import warnings
38
39from calendar import monthrange
40from io import StringIO
41
42import six
43from six import binary_type, integer_types, text_type
44
45from decimal import Decimal
46
47from warnings import warn
48
49from .. import relativedelta
50from .. import tz
51
52__all__ = ["parse", "parserinfo"]
53
54
55# TODO: pandas.core.tools.datetimes imports this explicitly.  Might be worth
56# making public and/or figuring out if there is something we can
57# take off their plate.
58class _timelex(object):
59    # Fractional seconds are sometimes split by a comma
60    _split_decimal = re.compile("([.,])")
61
62    def __init__(self, instream):
63        if six.PY2:
64            # In Python 2, we can't duck type properly because unicode has
65            # a 'decode' function, and we'd be double-decoding
66            if isinstance(instream, (binary_type, bytearray)):
67                instream = instream.decode()
68        else:
69            if getattr(instream, 'decode', None) is not None:
70                instream = instream.decode()
71
72        if isinstance(instream, text_type):
73            instream = StringIO(instream)
74        elif getattr(instream, 'read', None) is None:
75            raise TypeError('Parser must be a string or character stream, not '
76                            '{itype}'.format(itype=instream.__class__.__name__))
77
78        self.instream = instream
79        self.charstack = []
80        self.tokenstack = []
81        self.eof = False
82
83    def get_token(self):
84        """
85        This function breaks the time string into lexical units (tokens), which
86        can be parsed by the parser. Lexical units are demarcated by changes in
87        the character set, so any continuous string of letters is considered
88        one unit, any continuous string of numbers is considered one unit.
89
90        The main complication arises from the fact that dots ('.') can be used
91        both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
92        "4:30:21.447"). As such, it is necessary to read the full context of
93        any dot-separated strings before breaking it into tokens; as such, this
94        function maintains a "token stack", for when the ambiguous context
95        demands that multiple tokens be parsed at once.
96        """
97        if self.tokenstack:
98            return self.tokenstack.pop(0)
99
100        seenletters = False
101        token = None
102        state = None
103
104        while not self.eof:
105            # We only realize that we've reached the end of a token when we
106            # find a character that's not part of the current token - since
107            # that character may be part of the next token, it's stored in the
108            # charstack.
109            if self.charstack:
110                nextchar = self.charstack.pop(0)
111            else:
112                nextchar = self.instream.read(1)
113                while nextchar == '\x00':
114                    nextchar = self.instream.read(1)
115
116            if not nextchar:
117                self.eof = True
118                break
119            elif not state:
120                # First character of the token - determines if we're starting
121                # to parse a word, a number or something else.
122                token = nextchar
123                if self.isword(nextchar):
124                    state = 'a'
125                elif self.isnum(nextchar):
126                    state = '0'
127                elif self.isspace(nextchar):
128                    token = ' '
129                    break  # emit token
130                else:
131                    break  # emit token
132            elif state == 'a':
133                # If we've already started reading a word, we keep reading
134                # letters until we find something that's not part of a word.
135                seenletters = True
136                if self.isword(nextchar):
137                    token += nextchar
138                elif nextchar == '.':
139                    token += nextchar
140                    state = 'a.'
141                else:
142                    self.charstack.append(nextchar)
143                    break  # emit token
144            elif state == '0':
145                # If we've already started reading a number, we keep reading
146                # numbers until we find something that doesn't fit.
147                if self.isnum(nextchar):
148                    token += nextchar
149                elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
150                    token += nextchar
151                    state = '0.'
152                else:
153                    self.charstack.append(nextchar)
154                    break  # emit token
155            elif state == 'a.':
156                # If we've seen some letters and a dot separator, continue
157                # parsing, and the tokens will be broken up later.
158                seenletters = True
159                if nextchar == '.' or self.isword(nextchar):
160                    token += nextchar
161                elif self.isnum(nextchar) and token[-1] == '.':
162                    token += nextchar
163                    state = '0.'
164                else:
165                    self.charstack.append(nextchar)
166                    break  # emit token
167            elif state == '0.':
168                # If we've seen at least one dot separator, keep going, we'll
169                # break up the tokens later.
170                if nextchar == '.' or self.isnum(nextchar):
171                    token += nextchar
172                elif self.isword(nextchar) and token[-1] == '.':
173                    token += nextchar
174                    state = 'a.'
175                else:
176                    self.charstack.append(nextchar)
177                    break  # emit token
178
179        if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
180                                       token[-1] in '.,')):
181            l = self._split_decimal.split(token)
182            token = l[0]
183            for tok in l[1:]:
184                if tok:
185                    self.tokenstack.append(tok)
186
187        if state == '0.' and token.count('.') == 0:
188            token = token.replace(',', '.')
189
190        return token
191
192    def __iter__(self):
193        return self
194
195    def __next__(self):
196        token = self.get_token()
197        if token is None:
198            raise StopIteration
199
200        return token
201
202    def next(self):
203        return self.__next__()  # Python 2.x support
204
205    @classmethod
206    def split(cls, s):
207        return list(cls(s))
208
209    @classmethod
210    def isword(cls, nextchar):
211        """ Whether or not the next character is part of a word """
212        return nextchar.isalpha()
213
214    @classmethod
215    def isnum(cls, nextchar):
216        """ Whether the next character is part of a number """
217        return nextchar.isdigit()
218
219    @classmethod
220    def isspace(cls, nextchar):
221        """ Whether the next character is whitespace """
222        return nextchar.isspace()
223
224
225class _resultbase(object):
226
227    def __init__(self):
228        for attr in self.__slots__:
229            setattr(self, attr, None)
230
231    def _repr(self, classname):
232        l = []
233        for attr in self.__slots__:
234            value = getattr(self, attr)
235            if value is not None:
236                l.append("%s=%s" % (attr, repr(value)))
237        return "%s(%s)" % (classname, ", ".join(l))
238
239    def __len__(self):
240        return (sum(getattr(self, attr) is not None
241                    for attr in self.__slots__))
242
243    def __repr__(self):
244        return self._repr(self.__class__.__name__)
245
246
247class parserinfo(object):
248    """
249    Class which handles what inputs are accepted. Subclass this to customize
250    the language and acceptable values for each parameter.
251
252    :param dayfirst:
253        Whether to interpret the first value in an ambiguous 3-integer date
254        (e.g. 01/05/09) as the day (``True``) or month (``False``). If
255        ``yearfirst`` is set to ``True``, this distinguishes between YDM
256        and YMD. Default is ``False``.
257
258    :param yearfirst:
259        Whether to interpret the first value in an ambiguous 3-integer date
260        (e.g. 01/05/09) as the year. If ``True``, the first number is taken
261        to be the year, otherwise the last number is taken to be the year.
262        Default is ``False``.
263    """
264
265    # m from a.m/p.m, t from ISO T separator
266    JUMP = [" ", ".", ",", ";", "-", "/", "'",
267            "at", "on", "and", "ad", "m", "t", "of",
268            "st", "nd", "rd", "th"]
269
270    WEEKDAYS = [("Mon", "Monday"),
271                ("Tue", "Tuesday"),     # TODO: "Tues"
272                ("Wed", "Wednesday"),
273                ("Thu", "Thursday"),    # TODO: "Thurs"
274                ("Fri", "Friday"),
275                ("Sat", "Saturday"),
276                ("Sun", "Sunday")]
277    MONTHS = [("Jan", "January"),
278              ("Feb", "February"),      # TODO: "Febr"
279              ("Mar", "March"),
280              ("Apr", "April"),
281              ("May", "May"),
282              ("Jun", "June"),
283              ("Jul", "July"),
284              ("Aug", "August"),
285              ("Sep", "Sept", "September"),
286              ("Oct", "October"),
287              ("Nov", "November"),
288              ("Dec", "December")]
289    HMS = [("h", "hour", "hours"),
290           ("m", "minute", "minutes"),
291           ("s", "second", "seconds")]
292    AMPM = [("am", "a"),
293            ("pm", "p")]
294    UTCZONE = ["UTC", "GMT", "Z"]
295    PERTAIN = ["of"]
296    TZOFFSET = {}
297    # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
298    #              "Anno Domini", "Year of Our Lord"]
299
300    def __init__(self, dayfirst=False, yearfirst=False):
301        self._jump = self._convert(self.JUMP)
302        self._weekdays = self._convert(self.WEEKDAYS)
303        self._months = self._convert(self.MONTHS)
304        self._hms = self._convert(self.HMS)
305        self._ampm = self._convert(self.AMPM)
306        self._utczone = self._convert(self.UTCZONE)
307        self._pertain = self._convert(self.PERTAIN)
308
309        self.dayfirst = dayfirst
310        self.yearfirst = yearfirst
311
312        self._year = time.localtime().tm_year
313        self._century = self._year // 100 * 100
314
315    def _convert(self, lst):
316        dct = {}
317        for i, v in enumerate(lst):
318            if isinstance(v, tuple):
319                for v in v:
320                    dct[v.lower()] = i
321            else:
322                dct[v.lower()] = i
323        return dct
324
325    def jump(self, name):
326        return name.lower() in self._jump
327
328    def weekday(self, name):
329        try:
330            return self._weekdays[name.lower()]
331        except KeyError:
332            pass
333        return None
334
335    def month(self, name):
336        try:
337            return self._months[name.lower()] + 1
338        except KeyError:
339            pass
340        return None
341
342    def hms(self, name):
343        try:
344            return self._hms[name.lower()]
345        except KeyError:
346            return None
347
348    def ampm(self, name):
349        try:
350            return self._ampm[name.lower()]
351        except KeyError:
352            return None
353
354    def pertain(self, name):
355        return name.lower() in self._pertain
356
357    def utczone(self, name):
358        return name.lower() in self._utczone
359
360    def tzoffset(self, name):
361        if name in self._utczone:
362            return 0
363
364        return self.TZOFFSET.get(name)
365
366    def convertyear(self, year, century_specified=False):
367        if year < 100 and not century_specified:
368            year += self._century
369            if abs(year - self._year) >= 50:
370                if year < self._year:
371                    year += 100
372                else:
373                    year -= 100
374        return year
375
376    def validate(self, res):
377        # move to info
378        if res.year is not None:
379            res.year = self.convertyear(res.year, res.century_specified)
380
381        if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
382            res.tzname = "UTC"
383            res.tzoffset = 0
384        elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
385            res.tzoffset = 0
386        return True
387
388
389class _ymd(list):
390    def __init__(self, *args, **kwargs):
391        super(self.__class__, self).__init__(*args, **kwargs)
392        self.century_specified = False
393        self.dstridx = None
394        self.mstridx = None
395        self.ystridx = None
396
397    @property
398    def has_year(self):
399        return self.ystridx is not None
400
401    @property
402    def has_month(self):
403        return self.mstridx is not None
404
405    @property
406    def has_day(self):
407        return self.dstridx is not None
408
409    def could_be_day(self, value):
410        if self.has_day:
411            return False
412        elif not self.has_month:
413            return 1 <= value <= 31
414        elif not self.has_year:
415            # Be permissive, assume leapyear
416            month = self[self.mstridx]
417            return 1 <= value <= monthrange(2000, month)[1]
418        else:
419            month = self[self.mstridx]
420            year = self[self.ystridx]
421            return 1 <= value <= monthrange(year, month)[1]
422
423    def append(self, val, label=None):
424        if hasattr(val, '__len__'):
425            if val.isdigit() and len(val) > 2:
426                self.century_specified = True
427                if label not in [None, 'Y']:  # pragma: no cover
428                    raise ValueError(label)
429                label = 'Y'
430        elif val > 100:
431            self.century_specified = True
432            if label not in [None, 'Y']:  # pragma: no cover
433                raise ValueError(label)
434            label = 'Y'
435
436        super(self.__class__, self).append(int(val))
437
438        if label == 'M':
439            if self.has_month:
440                raise ValueError('Month is already set')
441            self.mstridx = len(self) - 1
442        elif label == 'D':
443            if self.has_day:
444                raise ValueError('Day is already set')
445            self.dstridx = len(self) - 1
446        elif label == 'Y':
447            if self.has_year:
448                raise ValueError('Year is already set')
449            self.ystridx = len(self) - 1
450
451    def resolve_ymd(self, yearfirst, dayfirst):
452        len_ymd = len(self)
453        year, month, day = (None, None, None)
454
455        mstridx = self.mstridx
456
457        if len_ymd > 3:
458            raise ValueError("More than three YMD values")
459        elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):
460            # One member, or two members with a month string
461            if mstridx is not None:
462                month = self[mstridx]
463                del self[mstridx]
464
465            if len_ymd > 1 or mstridx is None:
466                if self[0] > 31:
467                    year = self[0]
468                else:
469                    day = self[0]
470
471        elif len_ymd == 2:
472            # Two members with numbers
473            if self[0] > 31:
474                # 99-01
475                year, month = self
476            elif self[1] > 31:
477                # 01-99
478                month, year = self
479            elif dayfirst and self[1] <= 12:
480                # 13-01
481                day, month = self
482            else:
483                # 01-13
484                month, day = self
485
486        elif len_ymd == 3:
487            # Three members
488            if mstridx == 0:
489                if self[1] > 31:
490                    # Apr-2003-25
491                    month, year, day = self
492                else:
493                    month, day, year = self
494            elif mstridx == 1:
495                if self[0] > 31 or (yearfirst and self[2] <= 31):
496                    # 99-Jan-01
497                    year, month, day = self
498                else:
499                    # 01-Jan-01
500                    # Give precendence to day-first, since
501                    # two-digit years is usually hand-written.
502                    day, month, year = self
503
504            elif mstridx == 2:
505                # WTF!?
506                if self[1] > 31:
507                    # 01-99-Jan
508                    day, year, month = self
509                else:
510                    # 99-01-Jan
511                    year, day, month = self
512
513            else:
514                if (self[0] > 31 or
515                    self.ystridx == 0 or
516                        (yearfirst and self[1] <= 12 and self[2] <= 31)):
517                    # 99-01-01
518                    if dayfirst and self[2] <= 12:
519                        year, day, month = self
520                    else:
521                        year, month, day = self
522                elif self[0] > 12 or (dayfirst and self[1] <= 12):
523                    # 13-01-01
524                    day, month, year = self
525                else:
526                    # 01-13-01
527                    month, day, year = self
528
529        return year, month, day
530
531
532class parser(object):
533    def __init__(self, info=None):
534        self.info = info or parserinfo()
535
536    def parse(self, timestr, default=None,
537              ignoretz=False, tzinfos=None, **kwargs):
538        """
539        Parse the date/time string into a :class:`datetime.datetime` object.
540
541        :param timestr:
542            Any date/time string using the supported formats.
543
544        :param default:
545            The default datetime object, if this is a datetime object and not
546            ``None``, elements specified in ``timestr`` replace elements in the
547            default object.
548
549        :param ignoretz:
550            If set ``True``, time zones in parsed strings are ignored and a
551            naive :class:`datetime.datetime` object is returned.
552
553        :param tzinfos:
554            Additional time zone names / aliases which may be present in the
555            string. This argument maps time zone names (and optionally offsets
556            from those time zones) to time zones. This parameter can be a
557            dictionary with timezone aliases mapping time zone names to time
558            zones or a function taking two parameters (``tzname`` and
559            ``tzoffset``) and returning a time zone.
560
561            The timezones to which the names are mapped can be an integer
562            offset from UTC in seconds or a :class:`tzinfo` object.
563
564            .. doctest::
565               :options: +NORMALIZE_WHITESPACE
566
567                >>> from dateutil.parser import parse
568                >>> from dateutil.tz import gettz
569                >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
570                >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
571                datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
572                >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
573                datetime.datetime(2012, 1, 19, 17, 21,
574                                  tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
575
576            This parameter is ignored if ``ignoretz`` is set.
577
578        :param \\*\\*kwargs:
579            Keyword arguments as passed to ``_parse()``.
580
581        :return:
582            Returns a :class:`datetime.datetime` object or, if the
583            ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
584            first element being a :class:`datetime.datetime` object, the second
585            a tuple containing the fuzzy tokens.
586
587        :raises ValueError:
588            Raised for invalid or unknown string format, if the provided
589            :class:`tzinfo` is not in a valid format, or if an invalid date
590            would be created.
591
592        :raises TypeError:
593            Raised for non-string or character stream input.
594
595        :raises OverflowError:
596            Raised if the parsed date exceeds the largest valid C integer on
597            your system.
598        """
599
600        if default is None:
601            default = datetime.datetime.now().replace(hour=0, minute=0,
602                                                      second=0, microsecond=0)
603
604        res, skipped_tokens = self._parse(timestr, **kwargs)
605
606        if res is None:
607            raise ValueError("Unknown string format:", timestr)
608
609        if len(res) == 0:
610            raise ValueError("String does not contain a date:", timestr)
611
612        ret = self._build_naive(res, default)
613
614        if not ignoretz:
615            ret = self._build_tzaware(ret, res, tzinfos)
616
617        if kwargs.get('fuzzy_with_tokens', False):
618            return ret, skipped_tokens
619        else:
620            return ret
621
622    class _result(_resultbase):
623        __slots__ = ["year", "month", "day", "weekday",
624                     "hour", "minute", "second", "microsecond",
625                     "tzname", "tzoffset", "ampm","any_unused_tokens"]
626
627    def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
628               fuzzy_with_tokens=False):
629        """
630        Private method which performs the heavy lifting of parsing, called from
631        ``parse()``, which passes on its ``kwargs`` to this function.
632
633        :param timestr:
634            The string to parse.
635
636        :param dayfirst:
637            Whether to interpret the first value in an ambiguous 3-integer date
638            (e.g. 01/05/09) as the day (``True``) or month (``False``). If
639            ``yearfirst`` is set to ``True``, this distinguishes between YDM
640            and YMD. If set to ``None``, this value is retrieved from the
641            current :class:`parserinfo` object (which itself defaults to
642            ``False``).
643
644        :param yearfirst:
645            Whether to interpret the first value in an ambiguous 3-integer date
646            (e.g. 01/05/09) as the year. If ``True``, the first number is taken
647            to be the year, otherwise the last number is taken to be the year.
648            If this is set to ``None``, the value is retrieved from the current
649            :class:`parserinfo` object (which itself defaults to ``False``).
650
651        :param fuzzy:
652            Whether to allow fuzzy parsing, allowing for string like "Today is
653            January 1, 2047 at 8:21:00AM".
654
655        :param fuzzy_with_tokens:
656            If ``True``, ``fuzzy`` is automatically set to True, and the parser
657            will return a tuple where the first element is the parsed
658            :class:`datetime.datetime` datetimestamp and the second element is
659            a tuple containing the portions of the string which were ignored:
660
661            .. doctest::
662
663                >>> from dateutil.parser import parse
664                >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
665                (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
666
667        """
668        if fuzzy_with_tokens:
669            fuzzy = True
670
671        info = self.info
672
673        if dayfirst is None:
674            dayfirst = info.dayfirst
675
676        if yearfirst is None:
677            yearfirst = info.yearfirst
678
679        res = self._result()
680        l = _timelex.split(timestr)         # Splits the timestr into tokens
681
682        skipped_idxs = []
683
684        # year/month/day list
685        ymd = _ymd()
686
687        len_l = len(l)
688        i = 0
689        try:
690            while i < len_l:
691
692                # Check if it's a number
693                value_repr = l[i]
694                try:
695                    value = float(value_repr)
696                except ValueError:
697                    value = None
698
699                if value is not None:
700                    # Numeric token
701                    i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)
702
703                # Check weekday
704                elif info.weekday(l[i]) is not None:
705                    value = info.weekday(l[i])
706                    res.weekday = value
707
708                # Check month name
709                elif info.month(l[i]) is not None:
710                    value = info.month(l[i])
711                    ymd.append(value, 'M')
712
713                    if i + 1 < len_l:
714                        if l[i + 1] in ('-', '/'):
715                            # Jan-01[-99]
716                            sep = l[i + 1]
717                            ymd.append(l[i + 2])
718
719                            if i + 3 < len_l and l[i + 3] == sep:
720                                # Jan-01-99
721                                ymd.append(l[i + 4])
722                                i += 2
723
724                            i += 2
725
726                        elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and
727                              info.pertain(l[i + 2])):
728                            # Jan of 01
729                            # In this case, 01 is clearly year
730                            if l[i + 4].isdigit():
731                                # Convert it here to become unambiguous
732                                value = int(l[i + 4])
733                                year = str(info.convertyear(value))
734                                ymd.append(year, 'Y')
735                            else:
736                                # Wrong guess
737                                pass
738                                # TODO: not hit in tests
739                            i += 4
740
741                # Check am/pm
742                elif info.ampm(l[i]) is not None:
743                    value = info.ampm(l[i])
744                    val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)
745
746                    if val_is_ampm:
747                        res.hour = self._adjust_ampm(res.hour, value)
748                        res.ampm = value
749
750                    elif fuzzy:
751                        skipped_idxs.append(i)
752
753                # Check for a timezone name
754                elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):
755                    res.tzname = l[i]
756                    res.tzoffset = info.tzoffset(res.tzname)
757
758                    # Check for something like GMT+3, or BRST+3. Notice
759                    # that it doesn't mean "I am 3 hours after GMT", but
760                    # "my time +3 is GMT". If found, we reverse the
761                    # logic so that timezone parsing code will get it
762                    # right.
763                    if i + 1 < len_l and l[i + 1] in ('+', '-'):
764                        l[i + 1] = ('+', '-')[l[i + 1] == '+']
765                        res.tzoffset = None
766                        if info.utczone(res.tzname):
767                            # With something like GMT+3, the timezone
768                            # is *not* GMT.
769                            res.tzname = None
770
771                # Check for a numbered timezone
772                elif res.hour is not None and l[i] in ('+', '-'):
773                    signal = (-1, 1)[l[i] == '+']
774                    len_li = len(l[i + 1])
775
776                    # TODO: check that l[i + 1] is integer?
777                    if len_li == 4:
778                        # -0300
779                        hour_offset = int(l[i + 1][:2])
780                        min_offset = int(l[i + 1][2:])
781                    elif i + 2 < len_l and l[i + 2] == ':':
782                        # -03:00
783                        hour_offset = int(l[i + 1])
784                        min_offset = int(l[i + 3])  # TODO: Check that l[i+3] is minute-like?
785                        i += 2
786                    elif len_li <= 2:
787                        # -[0]3
788                        hour_offset = int(l[i + 1][:2])
789                        min_offset = 0
790                    else:
791                        raise ValueError(timestr)
792
793                    res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)
794
795                    # Look for a timezone name between parenthesis
796                    if (i + 5 < len_l and
797                            info.jump(l[i + 2]) and l[i + 3] == '(' and
798                            l[i + 5] == ')' and
799                            3 <= len(l[i + 4]) and
800                            self._could_be_tzname(res.hour, res.tzname,
801                                                  None, l[i + 4])):
802                        # -0300 (BRST)
803                        res.tzname = l[i + 4]
804                        i += 4
805
806                    i += 1
807
808                # Check jumps
809                elif not (info.jump(l[i]) or fuzzy):
810                    raise ValueError(timestr)
811
812                else:
813                    skipped_idxs.append(i)
814                i += 1
815
816            # Process year/month/day
817            year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)
818
819            res.century_specified = ymd.century_specified
820            res.year = year
821            res.month = month
822            res.day = day
823
824        except (IndexError, ValueError):
825            return None, None
826
827        if not info.validate(res):
828            return None, None
829
830        if fuzzy_with_tokens:
831            skipped_tokens = self._recombine_skipped(l, skipped_idxs)
832            return res, tuple(skipped_tokens)
833        else:
834            return res, None
835
836    def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):
837        # Token is a number
838        value_repr = tokens[idx]
839        try:
840            value = self._to_decimal(value_repr)
841        except Exception as e:
842            six.raise_from(ValueError('Unknown numeric token'), e)
843
844        len_li = len(value_repr)
845
846        len_l = len(tokens)
847
848        if (len(ymd) == 3 and len_li in (2, 4) and
849            res.hour is None and
850            (idx + 1 >= len_l or
851             (tokens[idx + 1] != ':' and
852              info.hms(tokens[idx + 1]) is None))):
853            # 19990101T23[59]
854            s = tokens[idx]
855            res.hour = int(s[:2])
856
857            if len_li == 4:
858                res.minute = int(s[2:])
859
860        elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):
861            # YYMMDD or HHMMSS[.ss]
862            s = tokens[idx]
863
864            if not ymd and '.' not in tokens[idx]:
865                ymd.append(s[:2])
866                ymd.append(s[2:4])
867                ymd.append(s[4:])
868            else:
869                # 19990101T235959[.59]
870
871                # TODO: Check if res attributes already set.
872                res.hour = int(s[:2])
873                res.minute = int(s[2:4])
874                res.second, res.microsecond = self._parsems(s[4:])
875
876        elif len_li in (8, 12, 14):
877            # YYYYMMDD
878            s = tokens[idx]
879            ymd.append(s[:4], 'Y')
880            ymd.append(s[4:6])
881            ymd.append(s[6:8])
882
883            if len_li > 8:
884                res.hour = int(s[8:10])
885                res.minute = int(s[10:12])
886
887                if len_li > 12:
888                    res.second = int(s[12:])
889
890        elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:
891            # HH[ ]h or MM[ ]m or SS[.ss][ ]s
892            hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)
893            (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)
894            if hms is not None:
895                # TODO: checking that hour/minute/second are not
896                # already set?
897                self._assign_hms(res, value_repr, hms)
898
899        elif idx + 2 < len_l and tokens[idx + 1] == ':':
900            # HH:MM[:SS[.ss]]
901            res.hour = int(value)
902            value = self._to_decimal(tokens[idx + 2])  # TODO: try/except for this?
903            (res.minute, res.second) = self._parse_min_sec(value)
904
905            if idx + 4 < len_l and tokens[idx + 3] == ':':
906                res.second, res.microsecond = self._parsems(tokens[idx + 4])
907
908                idx += 2
909
910            idx += 2
911
912        elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):
913            sep = tokens[idx + 1]
914            ymd.append(value_repr)
915
916            if idx + 2 < len_l and not info.jump(tokens[idx + 2]):
917                if tokens[idx + 2].isdigit():
918                    # 01-01[-01]
919                    ymd.append(tokens[idx + 2])
920                else:
921                    # 01-Jan[-01]
922                    value = info.month(tokens[idx + 2])
923
924                    if value is not None:
925                        ymd.append(value, 'M')
926                    else:
927                        raise ValueError()
928
929                if idx + 3 < len_l and tokens[idx + 3] == sep:
930                    # We have three members
931                    value = info.month(tokens[idx + 4])
932
933                    if value is not None:
934                        ymd.append(value, 'M')
935                    else:
936                        ymd.append(tokens[idx + 4])
937                    idx += 2
938
939                idx += 1
940            idx += 1
941
942        elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):
943            if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:
944                # 12 am
945                hour = int(value)
946                res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))
947                idx += 1
948            else:
949                # Year, month or day
950                ymd.append(value)
951            idx += 1
952
953        elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):
954            # 12am
955            hour = int(value)
956            res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))
957            idx += 1
958
959        elif ymd.could_be_day(value):
960            ymd.append(value)
961
962        elif not fuzzy:
963            raise ValueError()
964
965        return idx
966
967    def _find_hms_idx(self, idx, tokens, info, allow_jump):
968        len_l = len(tokens)
969
970        if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:
971            # There is an "h", "m", or "s" label following this token.  We take
972            # assign the upcoming label to the current token.
973            # e.g. the "12" in 12h"
974            hms_idx = idx + 1
975
976        elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and
977              info.hms(tokens[idx+2]) is not None):
978            # There is a space and then an "h", "m", or "s" label.
979            # e.g. the "12" in "12 h"
980            hms_idx = idx + 2
981
982        elif idx > 0 and info.hms(tokens[idx-1]) is not None:
983            # There is a "h", "m", or "s" preceeding this token.  Since neither
984            # of the previous cases was hit, there is no label following this
985            # token, so we use the previous label.
986            # e.g. the "04" in "12h04"
987            hms_idx = idx-1
988
989        elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and
990              info.hms(tokens[idx-2]) is not None):
991            # If we are looking at the final token, we allow for a
992            # backward-looking check to skip over a space.
993            # TODO: Are we sure this is the right condition here?
994            hms_idx = idx - 2
995
996        else:
997            hms_idx = None
998
999        return hms_idx
1000
1001    def _assign_hms(self, res, value_repr, hms):
1002        # See GH issue #427, fixing float rounding
1003        value = self._to_decimal(value_repr)
1004
1005        if hms == 0:
1006            # Hour
1007            res.hour = int(value)
1008            if value % 1:
1009                res.minute = int(60*(value % 1))
1010
1011        elif hms == 1:
1012            (res.minute, res.second) = self._parse_min_sec(value)
1013
1014        elif hms == 2:
1015            (res.second, res.microsecond) = self._parsems(value_repr)
1016
1017    def _could_be_tzname(self, hour, tzname, tzoffset, token):
1018        return (hour is not None and
1019                tzname is None and
1020                tzoffset is None and
1021                len(token) <= 5 and
1022                all(x in string.ascii_uppercase for x in token))
1023
1024    def _ampm_valid(self, hour, ampm, fuzzy):
1025        """
1026        For fuzzy parsing, 'a' or 'am' (both valid English words)
1027        may erroneously trigger the AM/PM flag. Deal with that
1028        here.
1029        """
1030        val_is_ampm = True
1031
1032        # If there's already an AM/PM flag, this one isn't one.
1033        if fuzzy and ampm is not None:
1034            val_is_ampm = False
1035
1036        # If AM/PM is found and hour is not, raise a ValueError
1037        if hour is None:
1038            if fuzzy:
1039                val_is_ampm = False
1040            else:
1041                raise ValueError('No hour specified with AM or PM flag.')
1042        elif not 0 <= hour <= 12:
1043            # If AM/PM is found, it's a 12 hour clock, so raise
1044            # an error for invalid range
1045            if fuzzy:
1046                val_is_ampm = False
1047            else:
1048                raise ValueError('Invalid hour specified for 12-hour clock.')
1049
1050        return val_is_ampm
1051
1052    def _adjust_ampm(self, hour, ampm):
1053        if hour < 12 and ampm == 1:
1054            hour += 12
1055        elif hour == 12 and ampm == 0:
1056            hour = 0
1057        return hour
1058
1059    def _parse_min_sec(self, value):
1060        # TODO: Every usage of this function sets res.second to the return
1061        # value. Are there any cases where second will be returned as None and
1062        # we *dont* want to set res.second = None?
1063        minute = int(value)
1064        second = None
1065
1066        sec_remainder = value % 1
1067        if sec_remainder:
1068            second = int(60 * sec_remainder)
1069        return (minute, second)
1070
1071    def _parsems(self, value):
1072        """Parse a I[.F] seconds value into (seconds, microseconds)."""
1073        if "." not in value:
1074            return int(value), 0
1075        else:
1076            i, f = value.split(".")
1077            return int(i), int(f.ljust(6, "0")[:6])
1078
1079    def _parse_hms(self, idx, tokens, info, hms_idx):
1080        # TODO: Is this going to admit a lot of false-positives for when we
1081        # just happen to have digits and "h", "m" or "s" characters in non-date
1082        # text?  I guess hex hashes won't have that problem, but there's plenty
1083        # of random junk out there.
1084        if hms_idx is None:
1085            hms = None
1086            new_idx = idx
1087        elif hms_idx > idx:
1088            hms = info.hms(tokens[hms_idx])
1089            new_idx = hms_idx
1090        else:
1091            # Looking backwards, increment one.
1092            hms = info.hms(tokens[hms_idx]) + 1
1093            new_idx = idx
1094
1095        return (new_idx, hms)
1096
1097    def _recombine_skipped(self, tokens, skipped_idxs):
1098        """
1099        >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
1100        >>> skipped_idxs = [0, 1, 2, 5]
1101        >>> _recombine_skipped(tokens, skipped_idxs)
1102        ["foo bar", "baz"]
1103        """
1104        skipped_tokens = []
1105        for i, idx in enumerate(sorted(skipped_idxs)):
1106            if i > 0 and idx - 1 == skipped_idxs[i - 1]:
1107                skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
1108            else:
1109                skipped_tokens.append(tokens[idx])
1110
1111        return skipped_tokens
1112
1113    def _build_tzinfo(self, tzinfos, tzname, tzoffset):
1114        if callable(tzinfos):
1115            tzdata = tzinfos(tzname, tzoffset)
1116        else:
1117            tzdata = tzinfos.get(tzname)
1118
1119        if isinstance(tzdata, datetime.tzinfo):
1120            tzinfo = tzdata
1121        elif isinstance(tzdata, text_type):
1122            tzinfo = tz.tzstr(tzdata)
1123        elif isinstance(tzdata, integer_types):
1124            tzinfo = tz.tzoffset(tzname, tzdata)
1125        else:
1126            raise ValueError("Offset must be tzinfo subclass, "
1127                             "tz string, or int offset.")
1128        return tzinfo
1129
1130    def _build_tzaware(self, naive, res, tzinfos):
1131        if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):
1132            tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)
1133            aware = naive.replace(tzinfo=tzinfo)
1134            aware = self._assign_tzname(aware, res.tzname)
1135
1136        elif res.tzname and res.tzname in time.tzname:
1137            aware = naive.replace(tzinfo=tz.tzlocal())
1138
1139            # Handle ambiguous local datetime
1140            aware = self._assign_tzname(aware, res.tzname)
1141
1142            # This is mostly relevant for winter GMT zones parsed in the UK
1143            if (aware.tzname() != res.tzname and
1144                    res.tzname in self.info.UTCZONE):
1145                aware = aware.replace(tzinfo=tz.tzutc())
1146
1147        elif res.tzoffset == 0:
1148            aware = naive.replace(tzinfo=tz.tzutc())
1149
1150        elif res.tzoffset:
1151            aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
1152
1153        elif not res.tzname and not res.tzoffset:
1154            # i.e. no timezone information was found.
1155            aware = naive
1156
1157        elif res.tzname:
1158            # tz-like string was parsed but we don't know what to do
1159            # with it
1160            warnings.warn("tzname {tzname} identified but not understood.  "
1161                          "Pass `tzinfos` argument in order to correctly "
1162                          "return a timezone-aware datetime.  In a future "
1163                          "version, this raise an "
1164                          "exception.".format(tzname=res.tzname),
1165                          category=UnknownTimezoneWarning)
1166            aware = naive
1167
1168        return aware
1169
1170    def _build_naive(self, res, default):
1171        repl = {}
1172        for attr in ("year", "month", "day", "hour",
1173                     "minute", "second", "microsecond"):
1174            value = getattr(res, attr)
1175            if value is not None:
1176                repl[attr] = value
1177
1178        if 'day' not in repl:
1179            # If the default day exceeds the last day of the month, fall back
1180            # to the end of the month.
1181            cyear = default.year if res.year is None else res.year
1182            cmonth = default.month if res.month is None else res.month
1183            cday = default.day if res.day is None else res.day
1184
1185            if cday > monthrange(cyear, cmonth)[1]:
1186                repl['day'] = monthrange(cyear, cmonth)[1]
1187
1188        naive = default.replace(**repl)
1189
1190        if res.weekday is not None and not res.day:
1191            naive = naive + relativedelta.relativedelta(weekday=res.weekday)
1192
1193        return naive
1194
1195    def _assign_tzname(self, dt, tzname):
1196        if dt.tzname() != tzname:
1197            new_dt = tz.enfold(dt, fold=1)
1198            if new_dt.tzname() == tzname:
1199                return new_dt
1200
1201        return dt
1202
1203    def _to_decimal(self, val):
1204        try:
1205            return Decimal(val)
1206        except Exception as e:
1207            msg = "Could not convert %s to decimal" % val
1208            six.raise_from(ValueError(msg), e)
1209
1210
1211DEFAULTPARSER = parser()
1212
1213
1214def parse(timestr, parserinfo=None, **kwargs):
1215    """
1216
1217    Parse a string in one of the supported formats, using the
1218    ``parserinfo`` parameters.
1219
1220    :param timestr:
1221        A string containing a date/time stamp.
1222
1223    :param parserinfo:
1224        A :class:`parserinfo` object containing parameters for the parser.
1225        If ``None``, the default arguments to the :class:`parserinfo`
1226        constructor are used.
1227
1228    The ``**kwargs`` parameter takes the following keyword arguments:
1229
1230    :param default:
1231        The default datetime object, if this is a datetime object and not
1232        ``None``, elements specified in ``timestr`` replace elements in the
1233        default object.
1234
1235    :param ignoretz:
1236        If set ``True``, time zones in parsed strings are ignored and a naive
1237        :class:`datetime` object is returned.
1238
1239    :param tzinfos:
1240        Additional time zone names / aliases which may be present in the
1241        string. This argument maps time zone names (and optionally offsets
1242        from those time zones) to time zones. This parameter can be a
1243        dictionary with timezone aliases mapping time zone names to time
1244        zones or a function taking two parameters (``tzname`` and
1245        ``tzoffset``) and returning a time zone.
1246
1247        The timezones to which the names are mapped can be an integer
1248        offset from UTC in seconds or a :class:`tzinfo` object.
1249
1250        .. doctest::
1251           :options: +NORMALIZE_WHITESPACE
1252
1253            >>> from dateutil.parser import parse
1254            >>> from dateutil.tz import gettz
1255            >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
1256            >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
1257            datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
1258            >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
1259            datetime.datetime(2012, 1, 19, 17, 21,
1260                              tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
1261
1262        This parameter is ignored if ``ignoretz`` is set.
1263
1264    :param dayfirst:
1265        Whether to interpret the first value in an ambiguous 3-integer date
1266        (e.g. 01/05/09) as the day (``True``) or month (``False``). If
1267        ``yearfirst`` is set to ``True``, this distinguishes between YDM and
1268        YMD. If set to ``None``, this value is retrieved from the current
1269        :class:`parserinfo` object (which itself defaults to ``False``).
1270
1271    :param yearfirst:
1272        Whether to interpret the first value in an ambiguous 3-integer date
1273        (e.g. 01/05/09) as the year. If ``True``, the first number is taken to
1274        be the year, otherwise the last number is taken to be the year. If
1275        this is set to ``None``, the value is retrieved from the current
1276        :class:`parserinfo` object (which itself defaults to ``False``).
1277
1278    :param fuzzy:
1279        Whether to allow fuzzy parsing, allowing for string like "Today is
1280        January 1, 2047 at 8:21:00AM".
1281
1282    :param fuzzy_with_tokens:
1283        If ``True``, ``fuzzy`` is automatically set to True, and the parser
1284        will return a tuple where the first element is the parsed
1285        :class:`datetime.datetime` datetimestamp and the second element is
1286        a tuple containing the portions of the string which were ignored:
1287
1288        .. doctest::
1289
1290            >>> from dateutil.parser import parse
1291            >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
1292            (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
1293
1294    :return:
1295        Returns a :class:`datetime.datetime` object or, if the
1296        ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
1297        first element being a :class:`datetime.datetime` object, the second
1298        a tuple containing the fuzzy tokens.
1299
1300    :raises ValueError:
1301        Raised for invalid or unknown string format, if the provided
1302        :class:`tzinfo` is not in a valid format, or if an invalid date
1303        would be created.
1304
1305    :raises OverflowError:
1306        Raised if the parsed date exceeds the largest valid C integer on
1307        your system.
1308    """
1309    if parserinfo:
1310        return parser(parserinfo).parse(timestr, **kwargs)
1311    else:
1312        return DEFAULTPARSER.parse(timestr, **kwargs)
1313
1314
1315class _tzparser(object):
1316
1317    class _result(_resultbase):
1318
1319        __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
1320                     "start", "end"]
1321
1322        class _attr(_resultbase):
1323            __slots__ = ["month", "week", "weekday",
1324                         "yday", "jyday", "day", "time"]
1325
1326        def __repr__(self):
1327            return self._repr("")
1328
1329        def __init__(self):
1330            _resultbase.__init__(self)
1331            self.start = self._attr()
1332            self.end = self._attr()
1333
1334    def parse(self, tzstr):
1335        res = self._result()
1336        l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]
1337        used_idxs = list()
1338        try:
1339
1340            len_l = len(l)
1341
1342            i = 0
1343            while i < len_l:
1344                # BRST+3[BRDT[+2]]
1345                j = i
1346                while j < len_l and not [x for x in l[j]
1347                                         if x in "0123456789:,-+"]:
1348                    j += 1
1349                if j != i:
1350                    if not res.stdabbr:
1351                        offattr = "stdoffset"
1352                        res.stdabbr = "".join(l[i:j])
1353                    else:
1354                        offattr = "dstoffset"
1355                        res.dstabbr = "".join(l[i:j])
1356
1357                    for ii in range(j):
1358                        used_idxs.append(ii)
1359                    i = j
1360                    if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
1361                                       "0123456789")):
1362                        if l[i] in ('+', '-'):
1363                            # Yes, that's right.  See the TZ variable
1364                            # documentation.
1365                            signal = (1, -1)[l[i] == '+']
1366                            used_idxs.append(i)
1367                            i += 1
1368                        else:
1369                            signal = -1
1370                        len_li = len(l[i])
1371                        if len_li == 4:
1372                            # -0300
1373                            setattr(res, offattr, (int(l[i][:2]) * 3600 +
1374                                                   int(l[i][2:]) * 60) * signal)
1375                        elif i + 1 < len_l and l[i + 1] == ':':
1376                            # -03:00
1377                            setattr(res, offattr,
1378                                    (int(l[i]) * 3600 +
1379                                     int(l[i + 2]) * 60) * signal)
1380                            used_idxs.append(i)
1381                            i += 2
1382                        elif len_li <= 2:
1383                            # -[0]3
1384                            setattr(res, offattr,
1385                                    int(l[i][:2]) * 3600 * signal)
1386                        else:
1387                            return None
1388                        used_idxs.append(i)
1389                        i += 1
1390                    if res.dstabbr:
1391                        break
1392                else:
1393                    break
1394
1395
1396            if i < len_l:
1397                for j in range(i, len_l):
1398                    if l[j] == ';':
1399                        l[j] = ','
1400
1401                assert l[i] == ','
1402
1403                i += 1
1404
1405            if i >= len_l:
1406                pass
1407            elif (8 <= l.count(',') <= 9 and
1408                  not [y for x in l[i:] if x != ','
1409                       for y in x if y not in "0123456789+-"]):
1410                # GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
1411                for x in (res.start, res.end):
1412                    x.month = int(l[i])
1413                    used_idxs.append(i)
1414                    i += 2
1415                    if l[i] == '-':
1416                        value = int(l[i + 1]) * -1
1417                        used_idxs.append(i)
1418                        i += 1
1419                    else:
1420                        value = int(l[i])
1421                    used_idxs.append(i)
1422                    i += 2
1423                    if value:
1424                        x.week = value
1425                        x.weekday = (int(l[i]) - 1) % 7
1426                    else:
1427                        x.day = int(l[i])
1428                    used_idxs.append(i)
1429                    i += 2
1430                    x.time = int(l[i])
1431                    used_idxs.append(i)
1432                    i += 2
1433                if i < len_l:
1434                    if l[i] in ('-', '+'):
1435                        signal = (-1, 1)[l[i] == "+"]
1436                        used_idxs.append(i)
1437                        i += 1
1438                    else:
1439                        signal = 1
1440                    used_idxs.append(i)
1441                    res.dstoffset = (res.stdoffset + int(l[i]) * signal)
1442
1443                # This was a made-up format that is not in normal use
1444                warn(('Parsed time zone "%s"' % tzstr) +
1445                     'is in a non-standard dateutil-specific format, which ' +
1446                     'is now deprecated; support for parsing this format ' +
1447                     'will be removed in future versions. It is recommended ' +
1448                     'that you switch to a standard format like the GNU ' +
1449                     'TZ variable format.', tz.DeprecatedTzFormatWarning)
1450            elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
1451                  not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
1452                                                     '.', '-', ':')
1453                       for y in x if y not in "0123456789"]):
1454                for x in (res.start, res.end):
1455                    if l[i] == 'J':
1456                        # non-leap year day (1 based)
1457                        used_idxs.append(i)
1458                        i += 1
1459                        x.jyday = int(l[i])
1460                    elif l[i] == 'M':
1461                        # month[-.]week[-.]weekday
1462                        used_idxs.append(i)
1463                        i += 1
1464                        x.month = int(l[i])
1465                        used_idxs.append(i)
1466                        i += 1
1467                        assert l[i] in ('-', '.')
1468                        used_idxs.append(i)
1469                        i += 1
1470                        x.week = int(l[i])
1471                        if x.week == 5:
1472                            x.week = -1
1473                        used_idxs.append(i)
1474                        i += 1
1475                        assert l[i] in ('-', '.')
1476                        used_idxs.append(i)
1477                        i += 1
1478                        x.weekday = (int(l[i]) - 1) % 7
1479                    else:
1480                        # year day (zero based)
1481                        x.yday = int(l[i]) + 1
1482
1483                    used_idxs.append(i)
1484                    i += 1
1485
1486                    if i < len_l and l[i] == '/':
1487                        used_idxs.append(i)
1488                        i += 1
1489                        # start time
1490                        len_li = len(l[i])
1491                        if len_li == 4:
1492                            # -0300
1493                            x.time = (int(l[i][:2]) * 3600 +
1494                                      int(l[i][2:]) * 60)
1495                        elif i + 1 < len_l and l[i + 1] == ':':
1496                            # -03:00
1497                            x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
1498                            used_idxs.append(i)
1499                            i += 2
1500                            if i + 1 < len_l and l[i + 1] == ':':
1501                                used_idxs.append(i)
1502                                i += 2
1503                                x.time += int(l[i])
1504                        elif len_li <= 2:
1505                            # -[0]3
1506                            x.time = (int(l[i][:2]) * 3600)
1507                        else:
1508                            return None
1509                        used_idxs.append(i)
1510                        i += 1
1511
1512                    assert i == len_l or l[i] == ','
1513
1514                    i += 1
1515
1516                assert i >= len_l
1517
1518        except (IndexError, ValueError, AssertionError):
1519            return None
1520
1521        unused_idxs = set(range(len_l)).difference(used_idxs)
1522        res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})
1523        return res
1524
1525
1526DEFAULTTZPARSER = _tzparser()
1527
1528
1529def _parsetz(tzstr):
1530    return DEFAULTTZPARSER.parse(tzstr)
1531
1532class UnknownTimezoneWarning(RuntimeWarning):
1533    """Raised when the parser finds a timezone it cannot parse into a tzinfo"""
1534# vim:ts=4:sw=4:et
1535