• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# module pyparsing.py
2#
3# Copyright (c) 2003-2016  Paul T. McGuire
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23#
24
25__doc__ = \
26"""
27pyparsing module - Classes and methods to define and execute parsing grammars
28
29The pyparsing module is an alternative approach to creating and executing simple grammars,
30vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
31don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
32provides a library of classes that you use to construct the grammar directly in Python.
33
34Here is a program to parse "Hello, World!" (or any greeting of the form
35C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
36(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
37L{Literal} expressions)::
38
39    from pyparsing import Word, alphas
40
41    # define grammar of a greeting
42    greet = Word(alphas) + "," + Word(alphas) + "!"
43
44    hello = "Hello, World!"
45    print (hello, "->", greet.parseString(hello))
46
47The program outputs the following::
48
49    Hello, World! -> ['Hello', ',', 'World', '!']
50
51The Python representation of the grammar is quite readable, owing to the self-explanatory
52class names, and the use of '+', '|' and '^' operators.
53
54The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
55object with named attributes.
56
57The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
58 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
59 - quoted strings
60 - embedded comments
61"""
62
63__version__ = "2.1.10"
64__versionTime__ = "07 Oct 2016 01:31 UTC"
65__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
66
67import string
68from weakref import ref as wkref
69import copy
70import sys
71import warnings
72import re
73import sre_constants
74import collections
75import pprint
76import traceback
77import types
78from datetime import datetime
79
80try:
81    from _thread import RLock
82except ImportError:
83    from threading import RLock
84
85try:
86    from collections import OrderedDict as _OrderedDict
87except ImportError:
88    try:
89        from ordereddict import OrderedDict as _OrderedDict
90    except ImportError:
91        _OrderedDict = None
92
93#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
94
95__all__ = [
96'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
97'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
98'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
99'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
100'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
101'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
102'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
103'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
104'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
105'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
106'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
107'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
108'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
109'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
110'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
111'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
112'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
113'CloseMatch', 'tokenMap', 'pyparsing_common',
114]
115
116system_version = tuple(sys.version_info)[:3]
117PY_3 = system_version[0] == 3
118if PY_3:
119    _MAX_INT = sys.maxsize
120    basestring = str
121    unichr = chr
122    _ustr = str
123
124    # build list of single arg builtins, that can be used as parse actions
125    singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
126
127else:
128    _MAX_INT = sys.maxint
129    range = xrange
130
131    def _ustr(obj):
132        """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
133           str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
134           then < returns the unicode object | encodes it with the default encoding | ... >.
135        """
136        if isinstance(obj,unicode):
137            return obj
138
139        try:
140            # If this works, then _ustr(obj) has the same behaviour as str(obj), so
141            # it won't break any existing code.
142            return str(obj)
143
144        except UnicodeEncodeError:
145            # Else encode it
146            ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
147            xmlcharref = Regex('&#\d+;')
148            xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
149            return xmlcharref.transformString(ret)
150
151    # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
152    singleArgBuiltins = []
153    import __builtin__
154    for fname in "sum len sorted reversed list tuple set any all min max".split():
155        try:
156            singleArgBuiltins.append(getattr(__builtin__,fname))
157        except AttributeError:
158            continue
159
160_generatorType = type((y for y in range(1)))
161
162def _xml_escape(data):
163    """Escape &, <, >, ", ', etc. in a string of data."""
164
165    # ampersand must be replaced first
166    from_symbols = '&><"\''
167    to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
168    for from_,to_ in zip(from_symbols, to_symbols):
169        data = data.replace(from_, to_)
170    return data
171
172class _Constants(object):
173    pass
174
175alphas     = string.ascii_uppercase + string.ascii_lowercase
176nums       = "0123456789"
177hexnums    = nums + "ABCDEFabcdef"
178alphanums  = alphas + nums
179_bslash    = chr(92)
180printables = "".join(c for c in string.printable if c not in string.whitespace)
181
182class ParseBaseException(Exception):
183    """base exception class for all parsing runtime exceptions"""
184    # Performance tuning: we construct a *lot* of these, so keep this
185    # constructor as small and fast as possible
186    def __init__( self, pstr, loc=0, msg=None, elem=None ):
187        self.loc = loc
188        if msg is None:
189            self.msg = pstr
190            self.pstr = ""
191        else:
192            self.msg = msg
193            self.pstr = pstr
194        self.parserElement = elem
195        self.args = (pstr, loc, msg)
196
197    @classmethod
198    def _from_exception(cls, pe):
199        """
200        internal factory method to simplify creating one type of ParseException
201        from another - avoids having __init__ signature conflicts among subclasses
202        """
203        return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
204
205    def __getattr__( self, aname ):
206        """supported attributes by name are:
207            - lineno - returns the line number of the exception text
208            - col - returns the column number of the exception text
209            - line - returns the line containing the exception text
210        """
211        if( aname == "lineno" ):
212            return lineno( self.loc, self.pstr )
213        elif( aname in ("col", "column") ):
214            return col( self.loc, self.pstr )
215        elif( aname == "line" ):
216            return line( self.loc, self.pstr )
217        else:
218            raise AttributeError(aname)
219
220    def __str__( self ):
221        return "%s (at char %d), (line:%d, col:%d)" % \
222                ( self.msg, self.loc, self.lineno, self.column )
223    def __repr__( self ):
224        return _ustr(self)
225    def markInputline( self, markerString = ">!<" ):
226        """Extracts the exception line from the input string, and marks
227           the location of the exception with a special symbol.
228        """
229        line_str = self.line
230        line_column = self.column - 1
231        if markerString:
232            line_str = "".join((line_str[:line_column],
233                                markerString, line_str[line_column:]))
234        return line_str.strip()
235    def __dir__(self):
236        return "lineno col line".split() + dir(type(self))
237
238class ParseException(ParseBaseException):
239    """
240    Exception thrown when parse expressions don't match class;
241    supported attributes by name are:
242     - lineno - returns the line number of the exception text
243     - col - returns the column number of the exception text
244     - line - returns the line containing the exception text
245
246    Example::
247        try:
248            Word(nums).setName("integer").parseString("ABC")
249        except ParseException as pe:
250            print(pe)
251            print("column: {}".format(pe.col))
252
253    prints::
254       Expected integer (at char 0), (line:1, col:1)
255        column: 1
256    """
257    pass
258
259class ParseFatalException(ParseBaseException):
260    """user-throwable exception thrown when inconsistent parse content
261       is found; stops all parsing immediately"""
262    pass
263
264class ParseSyntaxException(ParseFatalException):
265    """just like L{ParseFatalException}, but thrown internally when an
266       L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
267       immediately because an unbacktrackable syntax error has been found"""
268    pass
269
270#~ class ReparseException(ParseBaseException):
271    #~ """Experimental class - parse actions can raise this exception to cause
272       #~ pyparsing to reparse the input string:
273        #~ - with a modified input string, and/or
274        #~ - with a modified start location
275       #~ Set the values of the ReparseException in the constructor, and raise the
276       #~ exception in a parse action to cause pyparsing to use the new string/location.
277       #~ Setting the values as None causes no change to be made.
278       #~ """
279    #~ def __init_( self, newstring, restartLoc ):
280        #~ self.newParseText = newstring
281        #~ self.reparseLoc = restartLoc
282
283class RecursiveGrammarException(Exception):
284    """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
285    def __init__( self, parseElementList ):
286        self.parseElementTrace = parseElementList
287
288    def __str__( self ):
289        return "RecursiveGrammarException: %s" % self.parseElementTrace
290
291class _ParseResultsWithOffset(object):
292    def __init__(self,p1,p2):
293        self.tup = (p1,p2)
294    def __getitem__(self,i):
295        return self.tup[i]
296    def __repr__(self):
297        return repr(self.tup[0])
298    def setOffset(self,i):
299        self.tup = (self.tup[0],i)
300
301class ParseResults(object):
302    """
303    Structured parse results, to provide multiple means of access to the parsed data:
304       - as a list (C{len(results)})
305       - by list index (C{results[0], results[1]}, etc.)
306       - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
307
308    Example::
309        integer = Word(nums)
310        date_str = (integer.setResultsName("year") + '/'
311                        + integer.setResultsName("month") + '/'
312                        + integer.setResultsName("day"))
313        # equivalent form:
314        # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
315
316        # parseString returns a ParseResults object
317        result = date_str.parseString("1999/12/31")
318
319        def test(s, fn=repr):
320            print("%s -> %s" % (s, fn(eval(s))))
321        test("list(result)")
322        test("result[0]")
323        test("result['month']")
324        test("result.day")
325        test("'month' in result")
326        test("'minutes' in result")
327        test("result.dump()", str)
328    prints::
329        list(result) -> ['1999', '/', '12', '/', '31']
330        result[0] -> '1999'
331        result['month'] -> '12'
332        result.day -> '31'
333        'month' in result -> True
334        'minutes' in result -> False
335        result.dump() -> ['1999', '/', '12', '/', '31']
336        - day: 31
337        - month: 12
338        - year: 1999
339    """
340    def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
341        if isinstance(toklist, cls):
342            return toklist
343        retobj = object.__new__(cls)
344        retobj.__doinit = True
345        return retobj
346
347    # Performance tuning: we construct a *lot* of these, so keep this
348    # constructor as small and fast as possible
349    def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
350        if self.__doinit:
351            self.__doinit = False
352            self.__name = None
353            self.__parent = None
354            self.__accumNames = {}
355            self.__asList = asList
356            self.__modal = modal
357            if toklist is None:
358                toklist = []
359            if isinstance(toklist, list):
360                self.__toklist = toklist[:]
361            elif isinstance(toklist, _generatorType):
362                self.__toklist = list(toklist)
363            else:
364                self.__toklist = [toklist]
365            self.__tokdict = dict()
366
367        if name is not None and name:
368            if not modal:
369                self.__accumNames[name] = 0
370            if isinstance(name,int):
371                name = _ustr(name) # will always return a str, but use _ustr for consistency
372            self.__name = name
373            if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
374                if isinstance(toklist,basestring):
375                    toklist = [ toklist ]
376                if asList:
377                    if isinstance(toklist,ParseResults):
378                        self[name] = _ParseResultsWithOffset(toklist.copy(),0)
379                    else:
380                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
381                    self[name].__name = name
382                else:
383                    try:
384                        self[name] = toklist[0]
385                    except (KeyError,TypeError,IndexError):
386                        self[name] = toklist
387
388    def __getitem__( self, i ):
389        if isinstance( i, (int,slice) ):
390            return self.__toklist[i]
391        else:
392            if i not in self.__accumNames:
393                return self.__tokdict[i][-1][0]
394            else:
395                return ParseResults([ v[0] for v in self.__tokdict[i] ])
396
397    def __setitem__( self, k, v, isinstance=isinstance ):
398        if isinstance(v,_ParseResultsWithOffset):
399            self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
400            sub = v[0]
401        elif isinstance(k,(int,slice)):
402            self.__toklist[k] = v
403            sub = v
404        else:
405            self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
406            sub = v
407        if isinstance(sub,ParseResults):
408            sub.__parent = wkref(self)
409
410    def __delitem__( self, i ):
411        if isinstance(i,(int,slice)):
412            mylen = len( self.__toklist )
413            del self.__toklist[i]
414
415            # convert int to slice
416            if isinstance(i, int):
417                if i < 0:
418                    i += mylen
419                i = slice(i, i+1)
420            # get removed indices
421            removed = list(range(*i.indices(mylen)))
422            removed.reverse()
423            # fixup indices in token dictionary
424            for name,occurrences in self.__tokdict.items():
425                for j in removed:
426                    for k, (value, position) in enumerate(occurrences):
427                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
428        else:
429            del self.__tokdict[i]
430
431    def __contains__( self, k ):
432        return k in self.__tokdict
433
434    def __len__( self ): return len( self.__toklist )
435    def __bool__(self): return ( not not self.__toklist )
436    __nonzero__ = __bool__
437    def __iter__( self ): return iter( self.__toklist )
438    def __reversed__( self ): return iter( self.__toklist[::-1] )
439    def _iterkeys( self ):
440        if hasattr(self.__tokdict, "iterkeys"):
441            return self.__tokdict.iterkeys()
442        else:
443            return iter(self.__tokdict)
444
445    def _itervalues( self ):
446        return (self[k] for k in self._iterkeys())
447
448    def _iteritems( self ):
449        return ((k, self[k]) for k in self._iterkeys())
450
451    if PY_3:
452        keys = _iterkeys
453        """Returns an iterator of all named result keys (Python 3.x only)."""
454
455        values = _itervalues
456        """Returns an iterator of all named result values (Python 3.x only)."""
457
458        items = _iteritems
459        """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
460
461    else:
462        iterkeys = _iterkeys
463        """Returns an iterator of all named result keys (Python 2.x only)."""
464
465        itervalues = _itervalues
466        """Returns an iterator of all named result values (Python 2.x only)."""
467
468        iteritems = _iteritems
469        """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
470
471        def keys( self ):
472            """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
473            return list(self.iterkeys())
474
475        def values( self ):
476            """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
477            return list(self.itervalues())
478
479        def items( self ):
480            """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
481            return list(self.iteritems())
482
483    def haskeys( self ):
484        """Since keys() returns an iterator, this method is helpful in bypassing
485           code that looks for the existence of any defined results names."""
486        return bool(self.__tokdict)
487
488    def pop( self, *args, **kwargs):
489        """
490        Removes and returns item at specified index (default=C{last}).
491        Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
492        argument or an integer argument, it will use C{list} semantics
493        and pop tokens from the list of parsed tokens. If passed a
494        non-integer argument (most likely a string), it will use C{dict}
495        semantics and pop the corresponding value from any defined
496        results names. A second default return value argument is
497        supported, just as in C{dict.pop()}.
498
499        Example::
500            def remove_first(tokens):
501                tokens.pop(0)
502            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
503            print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
504
505            label = Word(alphas)
506            patt = label("LABEL") + OneOrMore(Word(nums))
507            print(patt.parseString("AAB 123 321").dump())
508
509            # Use pop() in a parse action to remove named result (note that corresponding value is not
510            # removed from list form of results)
511            def remove_LABEL(tokens):
512                tokens.pop("LABEL")
513                return tokens
514            patt.addParseAction(remove_LABEL)
515            print(patt.parseString("AAB 123 321").dump())
516        prints::
517            ['AAB', '123', '321']
518            - LABEL: AAB
519
520            ['AAB', '123', '321']
521        """
522        if not args:
523            args = [-1]
524        for k,v in kwargs.items():
525            if k == 'default':
526                args = (args[0], v)
527            else:
528                raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
529        if (isinstance(args[0], int) or
530                        len(args) == 1 or
531                        args[0] in self):
532            index = args[0]
533            ret = self[index]
534            del self[index]
535            return ret
536        else:
537            defaultvalue = args[1]
538            return defaultvalue
539
540    def get(self, key, defaultValue=None):
541        """
542        Returns named result matching the given key, or if there is no
543        such name, then returns the given C{defaultValue} or C{None} if no
544        C{defaultValue} is specified.
545
546        Similar to C{dict.get()}.
547
548        Example::
549            integer = Word(nums)
550            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
551
552            result = date_str.parseString("1999/12/31")
553            print(result.get("year")) # -> '1999'
554            print(result.get("hour", "not specified")) # -> 'not specified'
555            print(result.get("hour")) # -> None
556        """
557        if key in self:
558            return self[key]
559        else:
560            return defaultValue
561
562    def insert( self, index, insStr ):
563        """
564        Inserts new element at location index in the list of parsed tokens.
565
566        Similar to C{list.insert()}.
567
568        Example::
569            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
570
571            # use a parse action to insert the parse location in the front of the parsed results
572            def insert_locn(locn, tokens):
573                tokens.insert(0, locn)
574            print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
575        """
576        self.__toklist.insert(index, insStr)
577        # fixup indices in token dictionary
578        for name,occurrences in self.__tokdict.items():
579            for k, (value, position) in enumerate(occurrences):
580                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
581
582    def append( self, item ):
583        """
584        Add single element to end of ParseResults list of elements.
585
586        Example::
587            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
588
589            # use a parse action to compute the sum of the parsed integers, and add it to the end
590            def append_sum(tokens):
591                tokens.append(sum(map(int, tokens)))
592            print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
593        """
594        self.__toklist.append(item)
595
596    def extend( self, itemseq ):
597        """
598        Add sequence of elements to end of ParseResults list of elements.
599
600        Example::
601            patt = OneOrMore(Word(alphas))
602
603            # use a parse action to append the reverse of the matched strings, to make a palindrome
604            def make_palindrome(tokens):
605                tokens.extend(reversed([t[::-1] for t in tokens]))
606                return ''.join(tokens)
607            print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
608        """
609        if isinstance(itemseq, ParseResults):
610            self += itemseq
611        else:
612            self.__toklist.extend(itemseq)
613
614    def clear( self ):
615        """
616        Clear all elements and results names.
617        """
618        del self.__toklist[:]
619        self.__tokdict.clear()
620
621    def __getattr__( self, name ):
622        try:
623            return self[name]
624        except KeyError:
625            return ""
626
627        if name in self.__tokdict:
628            if name not in self.__accumNames:
629                return self.__tokdict[name][-1][0]
630            else:
631                return ParseResults([ v[0] for v in self.__tokdict[name] ])
632        else:
633            return ""
634
635    def __add__( self, other ):
636        ret = self.copy()
637        ret += other
638        return ret
639
640    def __iadd__( self, other ):
641        if other.__tokdict:
642            offset = len(self.__toklist)
643            addoffset = lambda a: offset if a<0 else a+offset
644            otheritems = other.__tokdict.items()
645            otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
646                                for (k,vlist) in otheritems for v in vlist]
647            for k,v in otherdictitems:
648                self[k] = v
649                if isinstance(v[0],ParseResults):
650                    v[0].__parent = wkref(self)
651
652        self.__toklist += other.__toklist
653        self.__accumNames.update( other.__accumNames )
654        return self
655
656    def __radd__(self, other):
657        if isinstance(other,int) and other == 0:
658            # useful for merging many ParseResults using sum() builtin
659            return self.copy()
660        else:
661            # this may raise a TypeError - so be it
662            return other + self
663
664    def __repr__( self ):
665        return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
666
667    def __str__( self ):
668        return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
669
670    def _asStringList( self, sep='' ):
671        out = []
672        for item in self.__toklist:
673            if out and sep:
674                out.append(sep)
675            if isinstance( item, ParseResults ):
676                out += item._asStringList()
677            else:
678                out.append( _ustr(item) )
679        return out
680
681    def asList( self ):
682        """
683        Returns the parse results as a nested list of matching tokens, all converted to strings.
684
685        Example::
686            patt = OneOrMore(Word(alphas))
687            result = patt.parseString("sldkj lsdkj sldkj")
688            # even though the result prints in string-like form, it is actually a pyparsing ParseResults
689            print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
690
691            # Use asList() to create an actual list
692            result_list = result.asList()
693            print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
694        """
695        return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
696
697    def asDict( self ):
698        """
699        Returns the named parse results as a nested dictionary.
700
701        Example::
702            integer = Word(nums)
703            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
704
705            result = date_str.parseString('12/31/1999')
706            print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
707
708            result_dict = result.asDict()
709            print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
710
711            # even though a ParseResults supports dict-like access, sometime you just need to have a dict
712            import json
713            print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
714            print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
715        """
716        if PY_3:
717            item_fn = self.items
718        else:
719            item_fn = self.iteritems
720
721        def toItem(obj):
722            if isinstance(obj, ParseResults):
723                if obj.haskeys():
724                    return obj.asDict()
725                else:
726                    return [toItem(v) for v in obj]
727            else:
728                return obj
729
730        return dict((k,toItem(v)) for k,v in item_fn())
731
732    def copy( self ):
733        """
734        Returns a new copy of a C{ParseResults} object.
735        """
736        ret = ParseResults( self.__toklist )
737        ret.__tokdict = self.__tokdict.copy()
738        ret.__parent = self.__parent
739        ret.__accumNames.update( self.__accumNames )
740        ret.__name = self.__name
741        return ret
742
743    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
744        """
745        (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
746        """
747        nl = "\n"
748        out = []
749        namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
750                                                            for v in vlist)
751        nextLevelIndent = indent + "  "
752
753        # collapse out indents if formatting is not desired
754        if not formatted:
755            indent = ""
756            nextLevelIndent = ""
757            nl = ""
758
759        selfTag = None
760        if doctag is not None:
761            selfTag = doctag
762        else:
763            if self.__name:
764                selfTag = self.__name
765
766        if not selfTag:
767            if namedItemsOnly:
768                return ""
769            else:
770                selfTag = "ITEM"
771
772        out += [ nl, indent, "<", selfTag, ">" ]
773
774        for i,res in enumerate(self.__toklist):
775            if isinstance(res,ParseResults):
776                if i in namedItems:
777                    out += [ res.asXML(namedItems[i],
778                                        namedItemsOnly and doctag is None,
779                                        nextLevelIndent,
780                                        formatted)]
781                else:
782                    out += [ res.asXML(None,
783                                        namedItemsOnly and doctag is None,
784                                        nextLevelIndent,
785                                        formatted)]
786            else:
787                # individual token, see if there is a name for it
788                resTag = None
789                if i in namedItems:
790                    resTag = namedItems[i]
791                if not resTag:
792                    if namedItemsOnly:
793                        continue
794                    else:
795                        resTag = "ITEM"
796                xmlBodyText = _xml_escape(_ustr(res))
797                out += [ nl, nextLevelIndent, "<", resTag, ">",
798                                                xmlBodyText,
799                                                "</", resTag, ">" ]
800
801        out += [ nl, indent, "</", selfTag, ">" ]
802        return "".join(out)
803
804    def __lookup(self,sub):
805        for k,vlist in self.__tokdict.items():
806            for v,loc in vlist:
807                if sub is v:
808                    return k
809        return None
810
811    def getName(self):
812        """
813        Returns the results name for this token expression. Useful when several
814        different expressions might match at a particular location.
815
816        Example::
817            integer = Word(nums)
818            ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
819            house_number_expr = Suppress('#') + Word(nums, alphanums)
820            user_data = (Group(house_number_expr)("house_number")
821                        | Group(ssn_expr)("ssn")
822                        | Group(integer)("age"))
823            user_info = OneOrMore(user_data)
824
825            result = user_info.parseString("22 111-22-3333 #221B")
826            for item in result:
827                print(item.getName(), ':', item[0])
828        prints::
829            age : 22
830            ssn : 111-22-3333
831            house_number : 221B
832        """
833        if self.__name:
834            return self.__name
835        elif self.__parent:
836            par = self.__parent()
837            if par:
838                return par.__lookup(self)
839            else:
840                return None
841        elif (len(self) == 1 and
842               len(self.__tokdict) == 1 and
843               next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
844            return next(iter(self.__tokdict.keys()))
845        else:
846            return None
847
848    def dump(self, indent='', depth=0, full=True):
849        """
850        Diagnostic method for listing out the contents of a C{ParseResults}.
851        Accepts an optional C{indent} argument so that this string can be embedded
852        in a nested display of other data.
853
854        Example::
855            integer = Word(nums)
856            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
857
858            result = date_str.parseString('12/31/1999')
859            print(result.dump())
860        prints::
861            ['12', '/', '31', '/', '1999']
862            - day: 1999
863            - month: 31
864            - year: 12
865        """
866        out = []
867        NL = '\n'
868        out.append( indent+_ustr(self.asList()) )
869        if full:
870            if self.haskeys():
871                items = sorted((str(k), v) for k,v in self.items())
872                for k,v in items:
873                    if out:
874                        out.append(NL)
875                    out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
876                    if isinstance(v,ParseResults):
877                        if v:
878                            out.append( v.dump(indent,depth+1) )
879                        else:
880                            out.append(_ustr(v))
881                    else:
882                        out.append(repr(v))
883            elif any(isinstance(vv,ParseResults) for vv in self):
884                v = self
885                for i,vv in enumerate(v):
886                    if isinstance(vv,ParseResults):
887                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),vv.dump(indent,depth+1) ))
888                    else:
889                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),_ustr(vv)))
890
891        return "".join(out)
892
893    def pprint(self, *args, **kwargs):
894        """
895        Pretty-printer for parsed results as a list, using the C{pprint} module.
896        Accepts additional positional or keyword args as defined for the
897        C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
898
899        Example::
900            ident = Word(alphas, alphanums)
901            num = Word(nums)
902            func = Forward()
903            term = ident | num | Group('(' + func + ')')
904            func <<= ident + Group(Optional(delimitedList(term)))
905            result = func.parseString("fna a,b,(fnb c,d,200),100")
906            result.pprint(width=40)
907        prints::
908            ['fna',
909             ['a',
910              'b',
911              ['(', 'fnb', ['c', 'd', '200'], ')'],
912              '100']]
913        """
914        pprint.pprint(self.asList(), *args, **kwargs)
915
916    # add support for pickle protocol
917    def __getstate__(self):
918        return ( self.__toklist,
919                 ( self.__tokdict.copy(),
920                   self.__parent is not None and self.__parent() or None,
921                   self.__accumNames,
922                   self.__name ) )
923
924    def __setstate__(self,state):
925        self.__toklist = state[0]
926        (self.__tokdict,
927         par,
928         inAccumNames,
929         self.__name) = state[1]
930        self.__accumNames = {}
931        self.__accumNames.update(inAccumNames)
932        if par is not None:
933            self.__parent = wkref(par)
934        else:
935            self.__parent = None
936
937    def __getnewargs__(self):
938        return self.__toklist, self.__name, self.__asList, self.__modal
939
940    def __dir__(self):
941        return (dir(type(self)) + list(self.keys()))
942
943collections.MutableMapping.register(ParseResults)
944
945def col (loc,strg):
946    """Returns current column within a string, counting newlines as line separators.
947   The first column is number 1.
948
949   Note: the default parsing behavior is to expand tabs in the input string
950   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
951   on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
952   consistent view of the parsed string, the parse location, and line and column
953   positions within the parsed string.
954   """
955    s = strg
956    return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
957
958def lineno(loc,strg):
959    """Returns current line number within a string, counting newlines as line separators.
960   The first line is number 1.
961
962   Note: the default parsing behavior is to expand tabs in the input string
963   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
964   on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
965   consistent view of the parsed string, the parse location, and line and column
966   positions within the parsed string.
967   """
968    return strg.count("\n",0,loc) + 1
969
970def line( loc, strg ):
971    """Returns the line of text containing loc within a string, counting newlines as line separators.
972       """
973    lastCR = strg.rfind("\n", 0, loc)
974    nextCR = strg.find("\n", loc)
975    if nextCR >= 0:
976        return strg[lastCR+1:nextCR]
977    else:
978        return strg[lastCR+1:]
979
980def _defaultStartDebugAction( instring, loc, expr ):
981    print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
982
983def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
984    print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
985
986def _defaultExceptionDebugAction( instring, loc, expr, exc ):
987    print ("Exception raised:" + _ustr(exc))
988
989def nullDebugAction(*args):
990    """'Do-nothing' debug action, to suppress debugging output during parsing."""
991    pass
992
993# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
994#~ 'decorator to trim function calls to match the arity of the target'
995#~ def _trim_arity(func, maxargs=3):
996    #~ if func in singleArgBuiltins:
997        #~ return lambda s,l,t: func(t)
998    #~ limit = 0
999    #~ foundArity = False
1000    #~ def wrapper(*args):
1001        #~ nonlocal limit,foundArity
1002        #~ while 1:
1003            #~ try:
1004                #~ ret = func(*args[limit:])
1005                #~ foundArity = True
1006                #~ return ret
1007            #~ except TypeError:
1008                #~ if limit == maxargs or foundArity:
1009                    #~ raise
1010                #~ limit += 1
1011                #~ continue
1012    #~ return wrapper
1013
1014# this version is Python 2.x-3.x cross-compatible
1015'decorator to trim function calls to match the arity of the target'
1016def _trim_arity(func, maxargs=2):
1017    if func in singleArgBuiltins:
1018        return lambda s,l,t: func(t)
1019    limit = [0]
1020    foundArity = [False]
1021
1022    # traceback return data structure changed in Py3.5 - normalize back to plain tuples
1023    if system_version[:2] >= (3,5):
1024        def extract_stack(limit=0):
1025            # special handling for Python 3.5.0 - extra deep call stack by 1
1026            offset = -3 if system_version == (3,5,0) else -2
1027            frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
1028            return [(frame_summary.filename, frame_summary.lineno)]
1029        def extract_tb(tb, limit=0):
1030            frames = traceback.extract_tb(tb, limit=limit)
1031            frame_summary = frames[-1]
1032            return [(frame_summary.filename, frame_summary.lineno)]
1033    else:
1034        extract_stack = traceback.extract_stack
1035        extract_tb = traceback.extract_tb
1036
1037    # synthesize what would be returned by traceback.extract_stack at the call to
1038    # user's parse action 'func', so that we don't incur call penalty at parse time
1039
1040    LINE_DIFF = 6
1041    # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
1042    # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
1043    this_line = extract_stack(limit=2)[-1]
1044    pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
1045
1046    def wrapper(*args):
1047        while 1:
1048            try:
1049                ret = func(*args[limit[0]:])
1050                foundArity[0] = True
1051                return ret
1052            except TypeError:
1053                # re-raise TypeErrors if they did not come from our arity testing
1054                if foundArity[0]:
1055                    raise
1056                else:
1057                    try:
1058                        tb = sys.exc_info()[-1]
1059                        if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1060                            raise
1061                    finally:
1062                        del tb
1063
1064                if limit[0] <= maxargs:
1065                    limit[0] += 1
1066                    continue
1067                raise
1068
1069    # copy func name to wrapper for sensible debug output
1070    func_name = "<parse action>"
1071    try:
1072        func_name = getattr(func, '__name__',
1073                            getattr(func, '__class__').__name__)
1074    except Exception:
1075        func_name = str(func)
1076    wrapper.__name__ = func_name
1077
1078    return wrapper
1079
1080class ParserElement(object):
1081    """Abstract base level parser element class."""
1082    DEFAULT_WHITE_CHARS = " \n\t\r"
1083    verbose_stacktrace = False
1084
1085    @staticmethod
1086    def setDefaultWhitespaceChars( chars ):
1087        r"""
1088        Overrides the default whitespace chars
1089
1090        Example::
1091            # default whitespace chars are space, <TAB> and newline
1092            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl']
1093
1094            # change to just treat newline as significant
1095            ParserElement.setDefaultWhitespaceChars(" \t")
1096            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def']
1097        """
1098        ParserElement.DEFAULT_WHITE_CHARS = chars
1099
1100    @staticmethod
1101    def inlineLiteralsUsing(cls):
1102        """
1103        Set class to be used for inclusion of string literals into a parser.
1104
1105        Example::
1106            # default literal class used is Literal
1107            integer = Word(nums)
1108            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1109
1110            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
1111
1112
1113            # change to Suppress
1114            ParserElement.inlineLiteralsUsing(Suppress)
1115            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1116
1117            date_str.parseString("1999/12/31")  # -> ['1999', '12', '31']
1118        """
1119        ParserElement._literalStringClass = cls
1120
1121    def __init__( self, savelist=False ):
1122        self.parseAction = list()
1123        self.failAction = None
1124        #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
1125        self.strRepr = None
1126        self.resultsName = None
1127        self.saveAsList = savelist
1128        self.skipWhitespace = True
1129        self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1130        self.copyDefaultWhiteChars = True
1131        self.mayReturnEmpty = False # used when checking for left-recursion
1132        self.keepTabs = False
1133        self.ignoreExprs = list()
1134        self.debug = False
1135        self.streamlined = False
1136        self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
1137        self.errmsg = ""
1138        self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
1139        self.debugActions = ( None, None, None ) #custom debug actions
1140        self.re = None
1141        self.callPreparse = True # used to avoid redundant calls to preParse
1142        self.callDuringTry = False
1143
1144    def copy( self ):
1145        """
1146        Make a copy of this C{ParserElement}.  Useful for defining different parse actions
1147        for the same parsing pattern, using copies of the original parse element.
1148
1149        Example::
1150            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1151            integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
1152            integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1153
1154            print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1155        prints::
1156            [5120, 100, 655360, 268435456]
1157        Equivalent form of C{expr.copy()} is just C{expr()}::
1158            integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1159        """
1160        cpy = copy.copy( self )
1161        cpy.parseAction = self.parseAction[:]
1162        cpy.ignoreExprs = self.ignoreExprs[:]
1163        if self.copyDefaultWhiteChars:
1164            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1165        return cpy
1166
1167    def setName( self, name ):
1168        """
1169        Define name for this expression, makes debugging and exception messages clearer.
1170
1171        Example::
1172            Word(nums).parseString("ABC")  # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1173            Word(nums).setName("integer").parseString("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1)
1174        """
1175        self.name = name
1176        self.errmsg = "Expected " + self.name
1177        if hasattr(self,"exception"):
1178            self.exception.msg = self.errmsg
1179        return self
1180
1181    def setResultsName( self, name, listAllMatches=False ):
1182        """
1183        Define name for referencing matching tokens as a nested attribute
1184        of the returned parse results.
1185        NOTE: this returns a *copy* of the original C{ParserElement} object;
1186        this is so that the client can define a basic element, such as an
1187        integer, and reference it in multiple places with different names.
1188
1189        You can also set results names using the abbreviated syntax,
1190        C{expr("name")} in place of C{expr.setResultsName("name")} -
1191        see L{I{__call__}<__call__>}.
1192
1193        Example::
1194            date_str = (integer.setResultsName("year") + '/'
1195                        + integer.setResultsName("month") + '/'
1196                        + integer.setResultsName("day"))
1197
1198            # equivalent form:
1199            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1200        """
1201        newself = self.copy()
1202        if name.endswith("*"):
1203            name = name[:-1]
1204            listAllMatches=True
1205        newself.resultsName = name
1206        newself.modalResults = not listAllMatches
1207        return newself
1208
1209    def setBreak(self,breakFlag = True):
1210        """Method to invoke the Python pdb debugger when this element is
1211           about to be parsed. Set C{breakFlag} to True to enable, False to
1212           disable.
1213        """
1214        if breakFlag:
1215            _parseMethod = self._parse
1216            def breaker(instring, loc, doActions=True, callPreParse=True):
1217                import pdb
1218                pdb.set_trace()
1219                return _parseMethod( instring, loc, doActions, callPreParse )
1220            breaker._originalParseMethod = _parseMethod
1221            self._parse = breaker
1222        else:
1223            if hasattr(self._parse,"_originalParseMethod"):
1224                self._parse = self._parse._originalParseMethod
1225        return self
1226
1227    def setParseAction( self, *fns, **kwargs ):
1228        """
1229        Define action to perform when successfully matching parse element definition.
1230        Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
1231        C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
1232         - s   = the original string being parsed (see note below)
1233         - loc = the location of the matching substring
1234         - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
1235        If the functions in fns modify the tokens, they can return them as the return
1236        value from fn, and the modified list of tokens will replace the original.
1237        Otherwise, fn does not need to return any value.
1238
1239        Optional keyword arguments:
1240         - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
1241
1242        Note: the default parsing behavior is to expand tabs in the input string
1243        before starting the parsing process.  See L{I{parseString}<parseString>} for more information
1244        on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
1245        consistent view of the parsed string, the parse location, and line and column
1246        positions within the parsed string.
1247
1248        Example::
1249            integer = Word(nums)
1250            date_str = integer + '/' + integer + '/' + integer
1251
1252            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
1253
1254            # use parse action to convert to ints at parse time
1255            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1256            date_str = integer + '/' + integer + '/' + integer
1257
1258            # note that integer fields are now ints, not strings
1259            date_str.parseString("1999/12/31")  # -> [1999, '/', 12, '/', 31]
1260        """
1261        self.parseAction = list(map(_trim_arity, list(fns)))
1262        self.callDuringTry = kwargs.get("callDuringTry", False)
1263        return self
1264
1265    def addParseAction( self, *fns, **kwargs ):
1266        """
1267        Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
1268
1269        See examples in L{I{copy}<copy>}.
1270        """
1271        self.parseAction += list(map(_trim_arity, list(fns)))
1272        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1273        return self
1274
1275    def addCondition(self, *fns, **kwargs):
1276        """Add a boolean predicate function to expression's list of parse actions. See
1277        L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
1278        functions passed to C{addCondition} need to return boolean success/fail of the condition.
1279
1280        Optional keyword arguments:
1281         - message = define a custom message to be used in the raised exception
1282         - fatal   = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1283
1284        Example::
1285            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1286            year_int = integer.copy()
1287            year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1288            date_str = year_int + '/' + integer + '/' + integer
1289
1290            result = date_str.parseString("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1291        """
1292        msg = kwargs.get("message", "failed user-defined condition")
1293        exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
1294        for fn in fns:
1295            def pa(s,l,t):
1296                if not bool(_trim_arity(fn)(s,l,t)):
1297                    raise exc_type(s,l,msg)
1298            self.parseAction.append(pa)
1299        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1300        return self
1301
1302    def setFailAction( self, fn ):
1303        """Define action to perform if parsing fails at this expression.
1304           Fail acton fn is a callable function that takes the arguments
1305           C{fn(s,loc,expr,err)} where:
1306            - s = string being parsed
1307            - loc = location where expression match was attempted and failed
1308            - expr = the parse expression that failed
1309            - err = the exception thrown
1310           The function returns no value.  It may throw C{L{ParseFatalException}}
1311           if it is desired to stop parsing immediately."""
1312        self.failAction = fn
1313        return self
1314
1315    def _skipIgnorables( self, instring, loc ):
1316        exprsFound = True
1317        while exprsFound:
1318            exprsFound = False
1319            for e in self.ignoreExprs:
1320                try:
1321                    while 1:
1322                        loc,dummy = e._parse( instring, loc )
1323                        exprsFound = True
1324                except ParseException:
1325                    pass
1326        return loc
1327
1328    def preParse( self, instring, loc ):
1329        if self.ignoreExprs:
1330            loc = self._skipIgnorables( instring, loc )
1331
1332        if self.skipWhitespace:
1333            wt = self.whiteChars
1334            instrlen = len(instring)
1335            while loc < instrlen and instring[loc] in wt:
1336                loc += 1
1337
1338        return loc
1339
1340    def parseImpl( self, instring, loc, doActions=True ):
1341        return loc, []
1342
1343    def postParse( self, instring, loc, tokenlist ):
1344        return tokenlist
1345
1346    #~ @profile
1347    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1348        debugging = ( self.debug ) #and doActions )
1349
1350        if debugging or self.failAction:
1351            #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
1352            if (self.debugActions[0] ):
1353                self.debugActions[0]( instring, loc, self )
1354            if callPreParse and self.callPreparse:
1355                preloc = self.preParse( instring, loc )
1356            else:
1357                preloc = loc
1358            tokensStart = preloc
1359            try:
1360                try:
1361                    loc,tokens = self.parseImpl( instring, preloc, doActions )
1362                except IndexError:
1363                    raise ParseException( instring, len(instring), self.errmsg, self )
1364            except ParseBaseException as err:
1365                #~ print ("Exception raised:", err)
1366                if self.debugActions[2]:
1367                    self.debugActions[2]( instring, tokensStart, self, err )
1368                if self.failAction:
1369                    self.failAction( instring, tokensStart, self, err )
1370                raise
1371        else:
1372            if callPreParse and self.callPreparse:
1373                preloc = self.preParse( instring, loc )
1374            else:
1375                preloc = loc
1376            tokensStart = preloc
1377            if self.mayIndexError or loc >= len(instring):
1378                try:
1379                    loc,tokens = self.parseImpl( instring, preloc, doActions )
1380                except IndexError:
1381                    raise ParseException( instring, len(instring), self.errmsg, self )
1382            else:
1383                loc,tokens = self.parseImpl( instring, preloc, doActions )
1384
1385        tokens = self.postParse( instring, loc, tokens )
1386
1387        retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1388        if self.parseAction and (doActions or self.callDuringTry):
1389            if debugging:
1390                try:
1391                    for fn in self.parseAction:
1392                        tokens = fn( instring, tokensStart, retTokens )
1393                        if tokens is not None:
1394                            retTokens = ParseResults( tokens,
1395                                                      self.resultsName,
1396                                                      asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1397                                                      modal=self.modalResults )
1398                except ParseBaseException as err:
1399                    #~ print "Exception raised in user parse action:", err
1400                    if (self.debugActions[2] ):
1401                        self.debugActions[2]( instring, tokensStart, self, err )
1402                    raise
1403            else:
1404                for fn in self.parseAction:
1405                    tokens = fn( instring, tokensStart, retTokens )
1406                    if tokens is not None:
1407                        retTokens = ParseResults( tokens,
1408                                                  self.resultsName,
1409                                                  asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1410                                                  modal=self.modalResults )
1411
1412        if debugging:
1413            #~ print ("Matched",self,"->",retTokens.asList())
1414            if (self.debugActions[1] ):
1415                self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1416
1417        return loc, retTokens
1418
1419    def tryParse( self, instring, loc ):
1420        try:
1421            return self._parse( instring, loc, doActions=False )[0]
1422        except ParseFatalException:
1423            raise ParseException( instring, loc, self.errmsg, self)
1424
1425    def canParseNext(self, instring, loc):
1426        try:
1427            self.tryParse(instring, loc)
1428        except (ParseException, IndexError):
1429            return False
1430        else:
1431            return True
1432
1433    class _UnboundedCache(object):
1434        def __init__(self):
1435            cache = {}
1436            self.not_in_cache = not_in_cache = object()
1437
1438            def get(self, key):
1439                return cache.get(key, not_in_cache)
1440
1441            def set(self, key, value):
1442                cache[key] = value
1443
1444            def clear(self):
1445                cache.clear()
1446
1447            self.get = types.MethodType(get, self)
1448            self.set = types.MethodType(set, self)
1449            self.clear = types.MethodType(clear, self)
1450
1451    if _OrderedDict is not None:
1452        class _FifoCache(object):
1453            def __init__(self, size):
1454                self.not_in_cache = not_in_cache = object()
1455
1456                cache = _OrderedDict()
1457
1458                def get(self, key):
1459                    return cache.get(key, not_in_cache)
1460
1461                def set(self, key, value):
1462                    cache[key] = value
1463                    if len(cache) > size:
1464                        cache.popitem(False)
1465
1466                def clear(self):
1467                    cache.clear()
1468
1469                self.get = types.MethodType(get, self)
1470                self.set = types.MethodType(set, self)
1471                self.clear = types.MethodType(clear, self)
1472
1473    else:
1474        class _FifoCache(object):
1475            def __init__(self, size):
1476                self.not_in_cache = not_in_cache = object()
1477
1478                cache = {}
1479                key_fifo = collections.deque([], size)
1480
1481                def get(self, key):
1482                    return cache.get(key, not_in_cache)
1483
1484                def set(self, key, value):
1485                    cache[key] = value
1486                    if len(cache) > size:
1487                        cache.pop(key_fifo.popleft(), None)
1488                    key_fifo.append(key)
1489
1490                def clear(self):
1491                    cache.clear()
1492                    key_fifo.clear()
1493
1494                self.get = types.MethodType(get, self)
1495                self.set = types.MethodType(set, self)
1496                self.clear = types.MethodType(clear, self)
1497
1498    # argument cache for optimizing repeated calls when backtracking through recursive expressions
1499    packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
1500    packrat_cache_lock = RLock()
1501    packrat_cache_stats = [0, 0]
1502
1503    # this method gets repeatedly called during backtracking with the same arguments -
1504    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1505    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1506        HIT, MISS = 0, 1
1507        lookup = (self, instring, loc, callPreParse, doActions)
1508        with ParserElement.packrat_cache_lock:
1509            cache = ParserElement.packrat_cache
1510            value = cache.get(lookup)
1511            if value is cache.not_in_cache:
1512                ParserElement.packrat_cache_stats[MISS] += 1
1513                try:
1514                    value = self._parseNoCache(instring, loc, doActions, callPreParse)
1515                except ParseBaseException as pe:
1516                    # cache a copy of the exception, without the traceback
1517                    cache.set(lookup, pe.__class__(*pe.args))
1518                    raise
1519                else:
1520                    cache.set(lookup, (value[0], value[1].copy()))
1521                    return value
1522            else:
1523                ParserElement.packrat_cache_stats[HIT] += 1
1524                if isinstance(value, Exception):
1525                    raise value
1526                return (value[0], value[1].copy())
1527
1528    _parse = _parseNoCache
1529
1530    @staticmethod
1531    def resetCache():
1532        ParserElement.packrat_cache.clear()
1533        ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1534
1535    _packratEnabled = False
1536    @staticmethod
1537    def enablePackrat(cache_size_limit=128):
1538        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1539           Repeated parse attempts at the same string location (which happens
1540           often in many complex grammars) can immediately return a cached value,
1541           instead of re-executing parsing/validating code.  Memoizing is done of
1542           both valid results and parsing exceptions.
1543
1544           Parameters:
1545            - cache_size_limit - (default=C{128}) - if an integer value is provided
1546              will limit the size of the packrat cache; if None is passed, then
1547              the cache size will be unbounded; if 0 is passed, the cache will
1548              be effectively disabled.
1549
1550           This speedup may break existing programs that use parse actions that
1551           have side-effects.  For this reason, packrat parsing is disabled when
1552           you first import pyparsing.  To activate the packrat feature, your
1553           program must call the class method C{ParserElement.enablePackrat()}.  If
1554           your program uses C{psyco} to "compile as you go", you must call
1555           C{enablePackrat} before calling C{psyco.full()}.  If you do not do this,
1556           Python will crash.  For best results, call C{enablePackrat()} immediately
1557           after importing pyparsing.
1558
1559           Example::
1560               import pyparsing
1561               pyparsing.ParserElement.enablePackrat()
1562        """
1563        if not ParserElement._packratEnabled:
1564            ParserElement._packratEnabled = True
1565            if cache_size_limit is None:
1566                ParserElement.packrat_cache = ParserElement._UnboundedCache()
1567            else:
1568                ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1569            ParserElement._parse = ParserElement._parseCache
1570
1571    def parseString( self, instring, parseAll=False ):
1572        """
1573        Execute the parse expression with the given string.
1574        This is the main interface to the client code, once the complete
1575        expression has been built.
1576
1577        If you want the grammar to require that the entire input string be
1578        successfully parsed, then set C{parseAll} to True (equivalent to ending
1579        the grammar with C{L{StringEnd()}}).
1580
1581        Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1582        in order to report proper column numbers in parse actions.
1583        If the input string contains tabs and
1584        the grammar uses parse actions that use the C{loc} argument to index into the
1585        string being parsed, you can ensure you have a consistent view of the input
1586        string by:
1587         - calling C{parseWithTabs} on your grammar before calling C{parseString}
1588           (see L{I{parseWithTabs}<parseWithTabs>})
1589         - define your parse action using the full C{(s,loc,toks)} signature, and
1590           reference the input string using the parse action's C{s} argument
1591         - explictly expand the tabs in your input string before calling
1592           C{parseString}
1593
1594        Example::
1595            Word('a').parseString('aaaaabaaa')  # -> ['aaaaa']
1596            Word('a').parseString('aaaaabaaa', parseAll=True)  # -> Exception: Expected end of text
1597        """
1598        ParserElement.resetCache()
1599        if not self.streamlined:
1600            self.streamline()
1601            #~ self.saveAsList = True
1602        for e in self.ignoreExprs:
1603            e.streamline()
1604        if not self.keepTabs:
1605            instring = instring.expandtabs()
1606        try:
1607            loc, tokens = self._parse( instring, 0 )
1608            if parseAll:
1609                loc = self.preParse( instring, loc )
1610                se = Empty() + StringEnd()
1611                se._parse( instring, loc )
1612        except ParseBaseException as exc:
1613            if ParserElement.verbose_stacktrace:
1614                raise
1615            else:
1616                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1617                raise exc
1618        else:
1619            return tokens
1620
1621    def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1622        """
1623        Scan the input string for expression matches.  Each match will return the
1624        matching tokens, start location, and end location.  May be called with optional
1625        C{maxMatches} argument, to clip scanning after 'n' matches are found.  If
1626        C{overlap} is specified, then overlapping matches will be reported.
1627
1628        Note that the start and end locations are reported relative to the string
1629        being parsed.  See L{I{parseString}<parseString>} for more information on parsing
1630        strings with embedded tabs.
1631
1632        Example::
1633            source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1634            print(source)
1635            for tokens,start,end in Word(alphas).scanString(source):
1636                print(' '*start + '^'*(end-start))
1637                print(' '*start + tokens[0])
1638
1639        prints::
1640
1641            sldjf123lsdjjkf345sldkjf879lkjsfd987
1642            ^^^^^
1643            sldjf
1644                    ^^^^^^^
1645                    lsdjjkf
1646                              ^^^^^^
1647                              sldkjf
1648                                       ^^^^^^
1649                                       lkjsfd
1650        """
1651        if not self.streamlined:
1652            self.streamline()
1653        for e in self.ignoreExprs:
1654            e.streamline()
1655
1656        if not self.keepTabs:
1657            instring = _ustr(instring).expandtabs()
1658        instrlen = len(instring)
1659        loc = 0
1660        preparseFn = self.preParse
1661        parseFn = self._parse
1662        ParserElement.resetCache()
1663        matches = 0
1664        try:
1665            while loc <= instrlen and matches < maxMatches:
1666                try:
1667                    preloc = preparseFn( instring, loc )
1668                    nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1669                except ParseException:
1670                    loc = preloc+1
1671                else:
1672                    if nextLoc > loc:
1673                        matches += 1
1674                        yield tokens, preloc, nextLoc
1675                        if overlap:
1676                            nextloc = preparseFn( instring, loc )
1677                            if nextloc > loc:
1678                                loc = nextLoc
1679                            else:
1680                                loc += 1
1681                        else:
1682                            loc = nextLoc
1683                    else:
1684                        loc = preloc+1
1685        except ParseBaseException as exc:
1686            if ParserElement.verbose_stacktrace:
1687                raise
1688            else:
1689                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1690                raise exc
1691
1692    def transformString( self, instring ):
1693        """
1694        Extension to C{L{scanString}}, to modify matching text with modified tokens that may
1695        be returned from a parse action.  To use C{transformString}, define a grammar and
1696        attach a parse action to it that modifies the returned token list.
1697        Invoking C{transformString()} on a target string will then scan for matches,
1698        and replace the matched text patterns according to the logic in the parse
1699        action.  C{transformString()} returns the resulting transformed string.
1700
1701        Example::
1702            wd = Word(alphas)
1703            wd.setParseAction(lambda toks: toks[0].title())
1704
1705            print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
1706        Prints::
1707            Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1708        """
1709        out = []
1710        lastE = 0
1711        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1712        # keep string locs straight between transformString and scanString
1713        self.keepTabs = True
1714        try:
1715            for t,s,e in self.scanString( instring ):
1716                out.append( instring[lastE:s] )
1717                if t:
1718                    if isinstance(t,ParseResults):
1719                        out += t.asList()
1720                    elif isinstance(t,list):
1721                        out += t
1722                    else:
1723                        out.append(t)
1724                lastE = e
1725            out.append(instring[lastE:])
1726            out = [o for o in out if o]
1727            return "".join(map(_ustr,_flatten(out)))
1728        except ParseBaseException as exc:
1729            if ParserElement.verbose_stacktrace:
1730                raise
1731            else:
1732                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1733                raise exc
1734
1735    def searchString( self, instring, maxMatches=_MAX_INT ):
1736        """
1737        Another extension to C{L{scanString}}, simplifying the access to the tokens found
1738        to match the given parse expression.  May be called with optional
1739        C{maxMatches} argument, to clip searching after 'n' matches are found.
1740
1741        Example::
1742            # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1743            cap_word = Word(alphas.upper(), alphas.lower())
1744
1745            print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
1746        prints::
1747            ['More', 'Iron', 'Lead', 'Gold', 'I']
1748        """
1749        try:
1750            return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1751        except ParseBaseException as exc:
1752            if ParserElement.verbose_stacktrace:
1753                raise
1754            else:
1755                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1756                raise exc
1757
1758    def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1759        """
1760        Generator method to split a string using the given expression as a separator.
1761        May be called with optional C{maxsplit} argument, to limit the number of splits;
1762        and the optional C{includeSeparators} argument (default=C{False}), if the separating
1763        matching text should be included in the split results.
1764
1765        Example::
1766            punc = oneOf(list(".,;:/-!?"))
1767            print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1768        prints::
1769            ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1770        """
1771        splits = 0
1772        last = 0
1773        for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1774            yield instring[last:s]
1775            if includeSeparators:
1776                yield t[0]
1777            last = e
1778        yield instring[last:]
1779
1780    def __add__(self, other ):
1781        """
1782        Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
1783        converts them to L{Literal}s by default.
1784
1785        Example::
1786            greet = Word(alphas) + "," + Word(alphas) + "!"
1787            hello = "Hello, World!"
1788            print (hello, "->", greet.parseString(hello))
1789        Prints::
1790            Hello, World! -> ['Hello', ',', 'World', '!']
1791        """
1792        if isinstance( other, basestring ):
1793            other = ParserElement._literalStringClass( other )
1794        if not isinstance( other, ParserElement ):
1795            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1796                    SyntaxWarning, stacklevel=2)
1797            return None
1798        return And( [ self, other ] )
1799
1800    def __radd__(self, other ):
1801        """
1802        Implementation of + operator when left operand is not a C{L{ParserElement}}
1803        """
1804        if isinstance( other, basestring ):
1805            other = ParserElement._literalStringClass( other )
1806        if not isinstance( other, ParserElement ):
1807            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1808                    SyntaxWarning, stacklevel=2)
1809            return None
1810        return other + self
1811
1812    def __sub__(self, other):
1813        """
1814        Implementation of - operator, returns C{L{And}} with error stop
1815        """
1816        if isinstance( other, basestring ):
1817            other = ParserElement._literalStringClass( other )
1818        if not isinstance( other, ParserElement ):
1819            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1820                    SyntaxWarning, stacklevel=2)
1821            return None
1822        return And( [ self, And._ErrorStop(), other ] )
1823
1824    def __rsub__(self, other ):
1825        """
1826        Implementation of - operator when left operand is not a C{L{ParserElement}}
1827        """
1828        if isinstance( other, basestring ):
1829            other = ParserElement._literalStringClass( other )
1830        if not isinstance( other, ParserElement ):
1831            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1832                    SyntaxWarning, stacklevel=2)
1833            return None
1834        return other - self
1835
1836    def __mul__(self,other):
1837        """
1838        Implementation of * operator, allows use of C{expr * 3} in place of
1839        C{expr + expr + expr}.  Expressions may also me multiplied by a 2-integer
1840        tuple, similar to C{{min,max}} multipliers in regular expressions.  Tuples
1841        may also include C{None} as in:
1842         - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1843              to C{expr*n + L{ZeroOrMore}(expr)}
1844              (read as "at least n instances of C{expr}")
1845         - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1846              (read as "0 to n instances of C{expr}")
1847         - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1848         - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1849
1850        Note that C{expr*(None,n)} does not raise an exception if
1851        more than n exprs exist in the input stream; that is,
1852        C{expr*(None,n)} does not enforce a maximum number of expr
1853        occurrences.  If this behavior is desired, then write
1854        C{expr*(None,n) + ~expr}
1855        """
1856        if isinstance(other,int):
1857            minElements, optElements = other,0
1858        elif isinstance(other,tuple):
1859            other = (other + (None, None))[:2]
1860            if other[0] is None:
1861                other = (0, other[1])
1862            if isinstance(other[0],int) and other[1] is None:
1863                if other[0] == 0:
1864                    return ZeroOrMore(self)
1865                if other[0] == 1:
1866                    return OneOrMore(self)
1867                else:
1868                    return self*other[0] + ZeroOrMore(self)
1869            elif isinstance(other[0],int) and isinstance(other[1],int):
1870                minElements, optElements = other
1871                optElements -= minElements
1872            else:
1873                raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1874        else:
1875            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1876
1877        if minElements < 0:
1878            raise ValueError("cannot multiply ParserElement by negative value")
1879        if optElements < 0:
1880            raise ValueError("second tuple value must be greater or equal to first tuple value")
1881        if minElements == optElements == 0:
1882            raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1883
1884        if (optElements):
1885            def makeOptionalList(n):
1886                if n>1:
1887                    return Optional(self + makeOptionalList(n-1))
1888                else:
1889                    return Optional(self)
1890            if minElements:
1891                if minElements == 1:
1892                    ret = self + makeOptionalList(optElements)
1893                else:
1894                    ret = And([self]*minElements) + makeOptionalList(optElements)
1895            else:
1896                ret = makeOptionalList(optElements)
1897        else:
1898            if minElements == 1:
1899                ret = self
1900            else:
1901                ret = And([self]*minElements)
1902        return ret
1903
1904    def __rmul__(self, other):
1905        return self.__mul__(other)
1906
1907    def __or__(self, other ):
1908        """
1909        Implementation of | operator - returns C{L{MatchFirst}}
1910        """
1911        if isinstance( other, basestring ):
1912            other = ParserElement._literalStringClass( other )
1913        if not isinstance( other, ParserElement ):
1914            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1915                    SyntaxWarning, stacklevel=2)
1916            return None
1917        return MatchFirst( [ self, other ] )
1918
1919    def __ror__(self, other ):
1920        """
1921        Implementation of | operator when left operand is not a C{L{ParserElement}}
1922        """
1923        if isinstance( other, basestring ):
1924            other = ParserElement._literalStringClass( other )
1925        if not isinstance( other, ParserElement ):
1926            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1927                    SyntaxWarning, stacklevel=2)
1928            return None
1929        return other | self
1930
1931    def __xor__(self, other ):
1932        """
1933        Implementation of ^ operator - returns C{L{Or}}
1934        """
1935        if isinstance( other, basestring ):
1936            other = ParserElement._literalStringClass( other )
1937        if not isinstance( other, ParserElement ):
1938            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1939                    SyntaxWarning, stacklevel=2)
1940            return None
1941        return Or( [ self, other ] )
1942
1943    def __rxor__(self, other ):
1944        """
1945        Implementation of ^ operator when left operand is not a C{L{ParserElement}}
1946        """
1947        if isinstance( other, basestring ):
1948            other = ParserElement._literalStringClass( other )
1949        if not isinstance( other, ParserElement ):
1950            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1951                    SyntaxWarning, stacklevel=2)
1952            return None
1953        return other ^ self
1954
1955    def __and__(self, other ):
1956        """
1957        Implementation of & operator - returns C{L{Each}}
1958        """
1959        if isinstance( other, basestring ):
1960            other = ParserElement._literalStringClass( other )
1961        if not isinstance( other, ParserElement ):
1962            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1963                    SyntaxWarning, stacklevel=2)
1964            return None
1965        return Each( [ self, other ] )
1966
1967    def __rand__(self, other ):
1968        """
1969        Implementation of & operator when left operand is not a C{L{ParserElement}}
1970        """
1971        if isinstance( other, basestring ):
1972            other = ParserElement._literalStringClass( other )
1973        if not isinstance( other, ParserElement ):
1974            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1975                    SyntaxWarning, stacklevel=2)
1976            return None
1977        return other & self
1978
1979    def __invert__( self ):
1980        """
1981        Implementation of ~ operator - returns C{L{NotAny}}
1982        """
1983        return NotAny( self )
1984
1985    def __call__(self, name=None):
1986        """
1987        Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
1988
1989        If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1990        passed as C{True}.
1991
1992        If C{name} is omitted, same as calling C{L{copy}}.
1993
1994        Example::
1995            # these are equivalent
1996            userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1997            userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1998        """
1999        if name is not None:
2000            return self.setResultsName(name)
2001        else:
2002            return self.copy()
2003
2004    def suppress( self ):
2005        """
2006        Suppresses the output of this C{ParserElement}; useful to keep punctuation from
2007        cluttering up returned output.
2008        """
2009        return Suppress( self )
2010
2011    def leaveWhitespace( self ):
2012        """
2013        Disables the skipping of whitespace before matching the characters in the
2014        C{ParserElement}'s defined pattern.  This is normally only used internally by
2015        the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2016        """
2017        self.skipWhitespace = False
2018        return self
2019
2020    def setWhitespaceChars( self, chars ):
2021        """
2022        Overrides the default whitespace chars
2023        """
2024        self.skipWhitespace = True
2025        self.whiteChars = chars
2026        self.copyDefaultWhiteChars = False
2027        return self
2028
2029    def parseWithTabs( self ):
2030        """
2031        Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
2032        Must be called before C{parseString} when the input grammar contains elements that
2033        match C{<TAB>} characters.
2034        """
2035        self.keepTabs = True
2036        return self
2037
2038    def ignore( self, other ):
2039        """
2040        Define expression to be ignored (e.g., comments) while doing pattern
2041        matching; may be called repeatedly, to define multiple comment or other
2042        ignorable patterns.
2043
2044        Example::
2045            patt = OneOrMore(Word(alphas))
2046            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2047
2048            patt.ignore(cStyleComment)
2049            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2050        """
2051        if isinstance(other, basestring):
2052            other = Suppress(other)
2053
2054        if isinstance( other, Suppress ):
2055            if other not in self.ignoreExprs:
2056                self.ignoreExprs.append(other)
2057        else:
2058            self.ignoreExprs.append( Suppress( other.copy() ) )
2059        return self
2060
2061    def setDebugActions( self, startAction, successAction, exceptionAction ):
2062        """
2063        Enable display of debugging messages while doing pattern matching.
2064        """
2065        self.debugActions = (startAction or _defaultStartDebugAction,
2066                             successAction or _defaultSuccessDebugAction,
2067                             exceptionAction or _defaultExceptionDebugAction)
2068        self.debug = True
2069        return self
2070
2071    def setDebug( self, flag=True ):
2072        """
2073        Enable display of debugging messages while doing pattern matching.
2074        Set C{flag} to True to enable, False to disable.
2075
2076        Example::
2077            wd = Word(alphas).setName("alphaword")
2078            integer = Word(nums).setName("numword")
2079            term = wd | integer
2080
2081            # turn on debugging for wd
2082            wd.setDebug()
2083
2084            OneOrMore(term).parseString("abc 123 xyz 890")
2085
2086        prints::
2087            Match alphaword at loc 0(1,1)
2088            Matched alphaword -> ['abc']
2089            Match alphaword at loc 3(1,4)
2090            Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2091            Match alphaword at loc 7(1,8)
2092            Matched alphaword -> ['xyz']
2093            Match alphaword at loc 11(1,12)
2094            Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2095            Match alphaword at loc 15(1,16)
2096            Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2097
2098        The output shown is that produced by the default debug actions - custom debug actions can be
2099        specified using L{setDebugActions}. Prior to attempting
2100        to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
2101        is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
2102        message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
2103        which makes debugging and exception messages easier to understand - for instance, the default
2104        name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
2105        """
2106        if flag:
2107            self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
2108        else:
2109            self.debug = False
2110        return self
2111
2112    def __str__( self ):
2113        return self.name
2114
2115    def __repr__( self ):
2116        return _ustr(self)
2117
2118    def streamline( self ):
2119        self.streamlined = True
2120        self.strRepr = None
2121        return self
2122
2123    def checkRecursion( self, parseElementList ):
2124        pass
2125
2126    def validate( self, validateTrace=[] ):
2127        """
2128        Check defined expressions for valid structure, check for infinite recursive definitions.
2129        """
2130        self.checkRecursion( [] )
2131
2132    def parseFile( self, file_or_filename, parseAll=False ):
2133        """
2134        Execute the parse expression on the given file or filename.
2135        If a filename is specified (instead of a file object),
2136        the entire file is opened, read, and closed before parsing.
2137        """
2138        try:
2139            file_contents = file_or_filename.read()
2140        except AttributeError:
2141            with open(file_or_filename, "r") as f:
2142                file_contents = f.read()
2143        try:
2144            return self.parseString(file_contents, parseAll)
2145        except ParseBaseException as exc:
2146            if ParserElement.verbose_stacktrace:
2147                raise
2148            else:
2149                # catch and re-raise exception from here, clears out pyparsing internal stack trace
2150                raise exc
2151
2152    def __eq__(self,other):
2153        if isinstance(other, ParserElement):
2154            return self is other or vars(self) == vars(other)
2155        elif isinstance(other, basestring):
2156            return self.matches(other)
2157        else:
2158            return super(ParserElement,self)==other
2159
2160    def __ne__(self,other):
2161        return not (self == other)
2162
2163    def __hash__(self):
2164        return hash(id(self))
2165
2166    def __req__(self,other):
2167        return self == other
2168
2169    def __rne__(self,other):
2170        return not (self == other)
2171
2172    def matches(self, testString, parseAll=True):
2173        """
2174        Method for quick testing of a parser against a test string. Good for simple
2175        inline microtests of sub expressions while building up larger parser.
2176
2177        Parameters:
2178         - testString - to test against this expression for a match
2179         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2180
2181        Example::
2182            expr = Word(nums)
2183            assert expr.matches("100")
2184        """
2185        try:
2186            self.parseString(_ustr(testString), parseAll=parseAll)
2187            return True
2188        except ParseBaseException:
2189            return False
2190
2191    def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2192        """
2193        Execute the parse expression on a series of test strings, showing each
2194        test, the parsed results or where the parse failed. Quick and easy way to
2195        run a parse expression against a list of sample strings.
2196
2197        Parameters:
2198         - tests - a list of separate test strings, or a multiline string of test strings
2199         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2200         - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
2201              string; pass None to disable comment filtering
2202         - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
2203              if False, only dump nested list
2204         - printResults - (default=C{True}) prints test output to stdout
2205         - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
2206
2207        Returns: a (success, results) tuple, where success indicates that all tests succeeded
2208        (or failed if C{failureTests} is True), and the results contain a list of lines of each
2209        test's output
2210
2211        Example::
2212            number_expr = pyparsing_common.number.copy()
2213
2214            result = number_expr.runTests('''
2215                # unsigned integer
2216                100
2217                # negative integer
2218                -100
2219                # float with scientific notation
2220                6.02e23
2221                # integer with scientific notation
2222                1e-12
2223                ''')
2224            print("Success" if result[0] else "Failed!")
2225
2226            result = number_expr.runTests('''
2227                # stray character
2228                100Z
2229                # missing leading digit before '.'
2230                -.100
2231                # too many '.'
2232                3.14.159
2233                ''', failureTests=True)
2234            print("Success" if result[0] else "Failed!")
2235        prints::
2236            # unsigned integer
2237            100
2238            [100]
2239
2240            # negative integer
2241            -100
2242            [-100]
2243
2244            # float with scientific notation
2245            6.02e23
2246            [6.02e+23]
2247
2248            # integer with scientific notation
2249            1e-12
2250            [1e-12]
2251
2252            Success
2253
2254            # stray character
2255            100Z
2256               ^
2257            FAIL: Expected end of text (at char 3), (line:1, col:4)
2258
2259            # missing leading digit before '.'
2260            -.100
2261            ^
2262            FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2263
2264            # too many '.'
2265            3.14.159
2266                ^
2267            FAIL: Expected end of text (at char 4), (line:1, col:5)
2268
2269            Success
2270
2271        Each test string must be on a single line. If you want to test a string that spans multiple
2272        lines, create a test like this::
2273
2274            expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2275
2276        (Note that this is a raw string literal, you must include the leading 'r'.)
2277        """
2278        if isinstance(tests, basestring):
2279            tests = list(map(str.strip, tests.rstrip().splitlines()))
2280        if isinstance(comment, basestring):
2281            comment = Literal(comment)
2282        allResults = []
2283        comments = []
2284        success = True
2285        for t in tests:
2286            if comment is not None and comment.matches(t, False) or comments and not t:
2287                comments.append(t)
2288                continue
2289            if not t:
2290                continue
2291            out = ['\n'.join(comments), t]
2292            comments = []
2293            try:
2294                t = t.replace(r'\n','\n')
2295                result = self.parseString(t, parseAll=parseAll)
2296                out.append(result.dump(full=fullDump))
2297                success = success and not failureTests
2298            except ParseBaseException as pe:
2299                fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2300                if '\n' in t:
2301                    out.append(line(pe.loc, t))
2302                    out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
2303                else:
2304                    out.append(' '*pe.loc + '^' + fatal)
2305                out.append("FAIL: " + str(pe))
2306                success = success and failureTests
2307                result = pe
2308            except Exception as exc:
2309                out.append("FAIL-EXCEPTION: " + str(exc))
2310                success = success and failureTests
2311                result = exc
2312
2313            if printResults:
2314                if fullDump:
2315                    out.append('')
2316                print('\n'.join(out))
2317
2318            allResults.append((t, result))
2319
2320        return success, allResults
2321
2322
2323class Token(ParserElement):
2324    """
2325    Abstract C{ParserElement} subclass, for defining atomic matching patterns.
2326    """
2327    def __init__( self ):
2328        super(Token,self).__init__( savelist=False )
2329
2330
2331class Empty(Token):
2332    """
2333    An empty token, will always match.
2334    """
2335    def __init__( self ):
2336        super(Empty,self).__init__()
2337        self.name = "Empty"
2338        self.mayReturnEmpty = True
2339        self.mayIndexError = False
2340
2341
2342class NoMatch(Token):
2343    """
2344    A token that will never match.
2345    """
2346    def __init__( self ):
2347        super(NoMatch,self).__init__()
2348        self.name = "NoMatch"
2349        self.mayReturnEmpty = True
2350        self.mayIndexError = False
2351        self.errmsg = "Unmatchable token"
2352
2353    def parseImpl( self, instring, loc, doActions=True ):
2354        raise ParseException(instring, loc, self.errmsg, self)
2355
2356
2357class Literal(Token):
2358    """
2359    Token to exactly match a specified string.
2360
2361    Example::
2362        Literal('blah').parseString('blah')  # -> ['blah']
2363        Literal('blah').parseString('blahfooblah')  # -> ['blah']
2364        Literal('blah').parseString('bla')  # -> Exception: Expected "blah"
2365
2366    For case-insensitive matching, use L{CaselessLiteral}.
2367
2368    For keyword matching (force word break before and after the matched string),
2369    use L{Keyword} or L{CaselessKeyword}.
2370    """
2371    def __init__( self, matchString ):
2372        super(Literal,self).__init__()
2373        self.match = matchString
2374        self.matchLen = len(matchString)
2375        try:
2376            self.firstMatchChar = matchString[0]
2377        except IndexError:
2378            warnings.warn("null string passed to Literal; use Empty() instead",
2379                            SyntaxWarning, stacklevel=2)
2380            self.__class__ = Empty
2381        self.name = '"%s"' % _ustr(self.match)
2382        self.errmsg = "Expected " + self.name
2383        self.mayReturnEmpty = False
2384        self.mayIndexError = False
2385
2386    # Performance tuning: this routine gets called a *lot*
2387    # if this is a single character match string  and the first character matches,
2388    # short-circuit as quickly as possible, and avoid calling startswith
2389    #~ @profile
2390    def parseImpl( self, instring, loc, doActions=True ):
2391        if (instring[loc] == self.firstMatchChar and
2392            (self.matchLen==1 or instring.startswith(self.match,loc)) ):
2393            return loc+self.matchLen, self.match
2394        raise ParseException(instring, loc, self.errmsg, self)
2395_L = Literal
2396ParserElement._literalStringClass = Literal
2397
2398class Keyword(Token):
2399    """
2400    Token to exactly match a specified string as a keyword, that is, it must be
2401    immediately followed by a non-keyword character.  Compare with C{L{Literal}}:
2402     - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
2403     - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
2404    Accepts two optional constructor arguments in addition to the keyword string:
2405     - C{identChars} is a string of characters that would be valid identifier characters,
2406          defaulting to all alphanumerics + "_" and "$"
2407     - C{caseless} allows case-insensitive matching, default is C{False}.
2408
2409    Example::
2410        Keyword("start").parseString("start")  # -> ['start']
2411        Keyword("start").parseString("starting")  # -> Exception
2412
2413    For case-insensitive matching, use L{CaselessKeyword}.
2414    """
2415    DEFAULT_KEYWORD_CHARS = alphanums+"_$"
2416
2417    def __init__( self, matchString, identChars=None, caseless=False ):
2418        super(Keyword,self).__init__()
2419        if identChars is None:
2420            identChars = Keyword.DEFAULT_KEYWORD_CHARS
2421        self.match = matchString
2422        self.matchLen = len(matchString)
2423        try:
2424            self.firstMatchChar = matchString[0]
2425        except IndexError:
2426            warnings.warn("null string passed to Keyword; use Empty() instead",
2427                            SyntaxWarning, stacklevel=2)
2428        self.name = '"%s"' % self.match
2429        self.errmsg = "Expected " + self.name
2430        self.mayReturnEmpty = False
2431        self.mayIndexError = False
2432        self.caseless = caseless
2433        if caseless:
2434            self.caselessmatch = matchString.upper()
2435            identChars = identChars.upper()
2436        self.identChars = set(identChars)
2437
2438    def parseImpl( self, instring, loc, doActions=True ):
2439        if self.caseless:
2440            if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2441                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
2442                 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
2443                return loc+self.matchLen, self.match
2444        else:
2445            if (instring[loc] == self.firstMatchChar and
2446                (self.matchLen==1 or instring.startswith(self.match,loc)) and
2447                (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
2448                (loc == 0 or instring[loc-1] not in self.identChars) ):
2449                return loc+self.matchLen, self.match
2450        raise ParseException(instring, loc, self.errmsg, self)
2451
2452    def copy(self):
2453        c = super(Keyword,self).copy()
2454        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
2455        return c
2456
2457    @staticmethod
2458    def setDefaultKeywordChars( chars ):
2459        """Overrides the default Keyword chars
2460        """
2461        Keyword.DEFAULT_KEYWORD_CHARS = chars
2462
2463class CaselessLiteral(Literal):
2464    """
2465    Token to match a specified string, ignoring case of letters.
2466    Note: the matched results will always be in the case of the given
2467    match string, NOT the case of the input text.
2468
2469    Example::
2470        OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2471
2472    (Contrast with example for L{CaselessKeyword}.)
2473    """
2474    def __init__( self, matchString ):
2475        super(CaselessLiteral,self).__init__( matchString.upper() )
2476        # Preserve the defining literal.
2477        self.returnString = matchString
2478        self.name = "'%s'" % self.returnString
2479        self.errmsg = "Expected " + self.name
2480
2481    def parseImpl( self, instring, loc, doActions=True ):
2482        if instring[ loc:loc+self.matchLen ].upper() == self.match:
2483            return loc+self.matchLen, self.returnString
2484        raise ParseException(instring, loc, self.errmsg, self)
2485
2486class CaselessKeyword(Keyword):
2487    """
2488    Caseless version of L{Keyword}.
2489
2490    Example::
2491        OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2492
2493    (Contrast with example for L{CaselessLiteral}.)
2494    """
2495    def __init__( self, matchString, identChars=None ):
2496        super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2497
2498    def parseImpl( self, instring, loc, doActions=True ):
2499        if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2500             (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
2501            return loc+self.matchLen, self.match
2502        raise ParseException(instring, loc, self.errmsg, self)
2503
2504class CloseMatch(Token):
2505    """
2506    A variation on L{Literal} which matches "close" matches, that is,
2507    strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
2508     - C{match_string} - string to be matched
2509     - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
2510
2511    The results from a successful parse will contain the matched text from the input string and the following named results:
2512     - C{mismatches} - a list of the positions within the match_string where mismatches were found
2513     - C{original} - the original match_string used to compare against the input string
2514
2515    If C{mismatches} is an empty list, then the match was an exact match.
2516
2517    Example::
2518        patt = CloseMatch("ATCATCGAATGGA")
2519        patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2520        patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2521
2522        # exact match
2523        patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2524
2525        # close match allowing up to 2 mismatches
2526        patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
2527        patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2528    """
2529    def __init__(self, match_string, maxMismatches=1):
2530        super(CloseMatch,self).__init__()
2531        self.name = match_string
2532        self.match_string = match_string
2533        self.maxMismatches = maxMismatches
2534        self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
2535        self.mayIndexError = False
2536        self.mayReturnEmpty = False
2537
2538    def parseImpl( self, instring, loc, doActions=True ):
2539        start = loc
2540        instrlen = len(instring)
2541        maxloc = start + len(self.match_string)
2542
2543        if maxloc <= instrlen:
2544            match_string = self.match_string
2545            match_stringloc = 0
2546            mismatches = []
2547            maxMismatches = self.maxMismatches
2548
2549            for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
2550                src,mat = s_m
2551                if src != mat:
2552                    mismatches.append(match_stringloc)
2553                    if len(mismatches) > maxMismatches:
2554                        break
2555            else:
2556                loc = match_stringloc + 1
2557                results = ParseResults([instring[start:loc]])
2558                results['original'] = self.match_string
2559                results['mismatches'] = mismatches
2560                return loc, results
2561
2562        raise ParseException(instring, loc, self.errmsg, self)
2563
2564
2565class Word(Token):
2566    """
2567    Token for matching words composed of allowed character sets.
2568    Defined with string containing all allowed initial characters,
2569    an optional string containing allowed body characters (if omitted,
2570    defaults to the initial character set), and an optional minimum,
2571    maximum, and/or exact length.  The default value for C{min} is 1 (a
2572    minimum value < 1 is not valid); the default values for C{max} and C{exact}
2573    are 0, meaning no maximum or exact length restriction. An optional
2574    C{excludeChars} parameter can list characters that might be found in
2575    the input C{bodyChars} string; useful to define a word of all printables
2576    except for one or two characters, for instance.
2577
2578    L{srange} is useful for defining custom character set strings for defining
2579    C{Word} expressions, using range notation from regular expression character sets.
2580
2581    A common mistake is to use C{Word} to match a specific literal string, as in
2582    C{Word("Address")}. Remember that C{Word} uses the string argument to define
2583    I{sets} of matchable characters. This expression would match "Add", "AAA",
2584    "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
2585    To match an exact literal string, use L{Literal} or L{Keyword}.
2586
2587    pyparsing includes helper strings for building Words:
2588     - L{alphas}
2589     - L{nums}
2590     - L{alphanums}
2591     - L{hexnums}
2592     - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
2593     - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2594     - L{printables} (any non-whitespace character)
2595
2596    Example::
2597        # a word composed of digits
2598        integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2599
2600        # a word with a leading capital, and zero or more lowercase
2601        capital_word = Word(alphas.upper(), alphas.lower())
2602
2603        # hostnames are alphanumeric, with leading alpha, and '-'
2604        hostname = Word(alphas, alphanums+'-')
2605
2606        # roman numeral (not a strict parser, accepts invalid mix of characters)
2607        roman = Word("IVXLCDM")
2608
2609        # any string of non-whitespace characters, except for ','
2610        csv_value = Word(printables, excludeChars=",")
2611    """
2612    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2613        super(Word,self).__init__()
2614        if excludeChars:
2615            initChars = ''.join(c for c in initChars if c not in excludeChars)
2616            if bodyChars:
2617                bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
2618        self.initCharsOrig = initChars
2619        self.initChars = set(initChars)
2620        if bodyChars :
2621            self.bodyCharsOrig = bodyChars
2622            self.bodyChars = set(bodyChars)
2623        else:
2624            self.bodyCharsOrig = initChars
2625            self.bodyChars = set(initChars)
2626
2627        self.maxSpecified = max > 0
2628
2629        if min < 1:
2630            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
2631
2632        self.minLen = min
2633
2634        if max > 0:
2635            self.maxLen = max
2636        else:
2637            self.maxLen = _MAX_INT
2638
2639        if exact > 0:
2640            self.maxLen = exact
2641            self.minLen = exact
2642
2643        self.name = _ustr(self)
2644        self.errmsg = "Expected " + self.name
2645        self.mayIndexError = False
2646        self.asKeyword = asKeyword
2647
2648        if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
2649            if self.bodyCharsOrig == self.initCharsOrig:
2650                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
2651            elif len(self.initCharsOrig) == 1:
2652                self.reString = "%s[%s]*" % \
2653                                      (re.escape(self.initCharsOrig),
2654                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
2655            else:
2656                self.reString = "[%s][%s]*" % \
2657                                      (_escapeRegexRangeChars(self.initCharsOrig),
2658                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
2659            if self.asKeyword:
2660                self.reString = r"\b"+self.reString+r"\b"
2661            try:
2662                self.re = re.compile( self.reString )
2663            except Exception:
2664                self.re = None
2665
2666    def parseImpl( self, instring, loc, doActions=True ):
2667        if self.re:
2668            result = self.re.match(instring,loc)
2669            if not result:
2670                raise ParseException(instring, loc, self.errmsg, self)
2671
2672            loc = result.end()
2673            return loc, result.group()
2674
2675        if not(instring[ loc ] in self.initChars):
2676            raise ParseException(instring, loc, self.errmsg, self)
2677
2678        start = loc
2679        loc += 1
2680        instrlen = len(instring)
2681        bodychars = self.bodyChars
2682        maxloc = start + self.maxLen
2683        maxloc = min( maxloc, instrlen )
2684        while loc < maxloc and instring[loc] in bodychars:
2685            loc += 1
2686
2687        throwException = False
2688        if loc - start < self.minLen:
2689            throwException = True
2690        if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2691            throwException = True
2692        if self.asKeyword:
2693            if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
2694                throwException = True
2695
2696        if throwException:
2697            raise ParseException(instring, loc, self.errmsg, self)
2698
2699        return loc, instring[start:loc]
2700
2701    def __str__( self ):
2702        try:
2703            return super(Word,self).__str__()
2704        except Exception:
2705            pass
2706
2707
2708        if self.strRepr is None:
2709
2710            def charsAsStr(s):
2711                if len(s)>4:
2712                    return s[:4]+"..."
2713                else:
2714                    return s
2715
2716            if ( self.initCharsOrig != self.bodyCharsOrig ):
2717                self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
2718            else:
2719                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
2720
2721        return self.strRepr
2722
2723
2724class Regex(Token):
2725    """
2726    Token for matching strings that match a given regular expression.
2727    Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
2728    If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
2729    named parse results.
2730
2731    Example::
2732        realnum = Regex(r"[+-]?\d+\.\d*")
2733        date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
2734        # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2735        roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2736    """
2737    compiledREtype = type(re.compile("[A-Z]"))
2738    def __init__( self, pattern, flags=0):
2739        """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
2740        super(Regex,self).__init__()
2741
2742        if isinstance(pattern, basestring):
2743            if not pattern:
2744                warnings.warn("null string passed to Regex; use Empty() instead",
2745                        SyntaxWarning, stacklevel=2)
2746
2747            self.pattern = pattern
2748            self.flags = flags
2749
2750            try:
2751                self.re = re.compile(self.pattern, self.flags)
2752                self.reString = self.pattern
2753            except sre_constants.error:
2754                warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
2755                    SyntaxWarning, stacklevel=2)
2756                raise
2757
2758        elif isinstance(pattern, Regex.compiledREtype):
2759            self.re = pattern
2760            self.pattern = \
2761            self.reString = str(pattern)
2762            self.flags = flags
2763
2764        else:
2765            raise ValueError("Regex may only be constructed with a string or a compiled RE object")
2766
2767        self.name = _ustr(self)
2768        self.errmsg = "Expected " + self.name
2769        self.mayIndexError = False
2770        self.mayReturnEmpty = True
2771
2772    def parseImpl( self, instring, loc, doActions=True ):
2773        result = self.re.match(instring,loc)
2774        if not result:
2775            raise ParseException(instring, loc, self.errmsg, self)
2776
2777        loc = result.end()
2778        d = result.groupdict()
2779        ret = ParseResults(result.group())
2780        if d:
2781            for k in d:
2782                ret[k] = d[k]
2783        return loc,ret
2784
2785    def __str__( self ):
2786        try:
2787            return super(Regex,self).__str__()
2788        except Exception:
2789            pass
2790
2791        if self.strRepr is None:
2792            self.strRepr = "Re:(%s)" % repr(self.pattern)
2793
2794        return self.strRepr
2795
2796
2797class QuotedString(Token):
2798    r"""
2799    Token for matching strings that are delimited by quoting characters.
2800
2801    Defined with the following parameters:
2802        - quoteChar - string of one or more characters defining the quote delimiting string
2803        - escChar - character to escape quotes, typically backslash (default=C{None})
2804        - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
2805        - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2806        - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2807        - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2808        - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2809
2810    Example::
2811        qs = QuotedString('"')
2812        print(qs.searchString('lsjdf "This is the quote" sldjf'))
2813        complex_qs = QuotedString('{{', endQuoteChar='}}')
2814        print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
2815        sql_qs = QuotedString('"', escQuote='""')
2816        print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
2817    prints::
2818        [['This is the quote']]
2819        [['This is the "quote"']]
2820        [['This is the quote with "embedded" quotes']]
2821    """
2822    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2823        super(QuotedString,self).__init__()
2824
2825        # remove white space from quote chars - wont work anyway
2826        quoteChar = quoteChar.strip()
2827        if not quoteChar:
2828            warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2829            raise SyntaxError()
2830
2831        if endQuoteChar is None:
2832            endQuoteChar = quoteChar
2833        else:
2834            endQuoteChar = endQuoteChar.strip()
2835            if not endQuoteChar:
2836                warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2837                raise SyntaxError()
2838
2839        self.quoteChar = quoteChar
2840        self.quoteCharLen = len(quoteChar)
2841        self.firstQuoteChar = quoteChar[0]
2842        self.endQuoteChar = endQuoteChar
2843        self.endQuoteCharLen = len(endQuoteChar)
2844        self.escChar = escChar
2845        self.escQuote = escQuote
2846        self.unquoteResults = unquoteResults
2847        self.convertWhitespaceEscapes = convertWhitespaceEscapes
2848
2849        if multiline:
2850            self.flags = re.MULTILINE | re.DOTALL
2851            self.pattern = r'%s(?:[^%s%s]' % \
2852                ( re.escape(self.quoteChar),
2853                  _escapeRegexRangeChars(self.endQuoteChar[0]),
2854                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2855        else:
2856            self.flags = 0
2857            self.pattern = r'%s(?:[^%s\n\r%s]' % \
2858                ( re.escape(self.quoteChar),
2859                  _escapeRegexRangeChars(self.endQuoteChar[0]),
2860                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2861        if len(self.endQuoteChar) > 1:
2862            self.pattern += (
2863                '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2864                                               _escapeRegexRangeChars(self.endQuoteChar[i]))
2865                                    for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2866                )
2867        if escQuote:
2868            self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2869        if escChar:
2870            self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2871            self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2872        self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2873
2874        try:
2875            self.re = re.compile(self.pattern, self.flags)
2876            self.reString = self.pattern
2877        except sre_constants.error:
2878            warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2879                SyntaxWarning, stacklevel=2)
2880            raise
2881
2882        self.name = _ustr(self)
2883        self.errmsg = "Expected " + self.name
2884        self.mayIndexError = False
2885        self.mayReturnEmpty = True
2886
2887    def parseImpl( self, instring, loc, doActions=True ):
2888        result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2889        if not result:
2890            raise ParseException(instring, loc, self.errmsg, self)
2891
2892        loc = result.end()
2893        ret = result.group()
2894
2895        if self.unquoteResults:
2896
2897            # strip off quotes
2898            ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2899
2900            if isinstance(ret,basestring):
2901                # replace escaped whitespace
2902                if '\\' in ret and self.convertWhitespaceEscapes:
2903                    ws_map = {
2904                        r'\t' : '\t',
2905                        r'\n' : '\n',
2906                        r'\f' : '\f',
2907                        r'\r' : '\r',
2908                    }
2909                    for wslit,wschar in ws_map.items():
2910                        ret = ret.replace(wslit, wschar)
2911
2912                # replace escaped characters
2913                if self.escChar:
2914                    ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
2915
2916                # replace escaped quotes
2917                if self.escQuote:
2918                    ret = ret.replace(self.escQuote, self.endQuoteChar)
2919
2920        return loc, ret
2921
2922    def __str__( self ):
2923        try:
2924            return super(QuotedString,self).__str__()
2925        except Exception:
2926            pass
2927
2928        if self.strRepr is None:
2929            self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2930
2931        return self.strRepr
2932
2933
2934class CharsNotIn(Token):
2935    """
2936    Token for matching words composed of characters I{not} in a given set (will
2937    include whitespace in matched characters if not listed in the provided exclusion set - see example).
2938    Defined with string containing all disallowed characters, and an optional
2939    minimum, maximum, and/or exact length.  The default value for C{min} is 1 (a
2940    minimum value < 1 is not valid); the default values for C{max} and C{exact}
2941    are 0, meaning no maximum or exact length restriction.
2942
2943    Example::
2944        # define a comma-separated-value as anything that is not a ','
2945        csv_value = CharsNotIn(',')
2946        print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
2947    prints::
2948        ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
2949    """
2950    def __init__( self, notChars, min=1, max=0, exact=0 ):
2951        super(CharsNotIn,self).__init__()
2952        self.skipWhitespace = False
2953        self.notChars = notChars
2954
2955        if min < 1:
2956            raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2957
2958        self.minLen = min
2959
2960        if max > 0:
2961            self.maxLen = max
2962        else:
2963            self.maxLen = _MAX_INT
2964
2965        if exact > 0:
2966            self.maxLen = exact
2967            self.minLen = exact
2968
2969        self.name = _ustr(self)
2970        self.errmsg = "Expected " + self.name
2971        self.mayReturnEmpty = ( self.minLen == 0 )
2972        self.mayIndexError = False
2973
2974    def parseImpl( self, instring, loc, doActions=True ):
2975        if instring[loc] in self.notChars:
2976            raise ParseException(instring, loc, self.errmsg, self)
2977
2978        start = loc
2979        loc += 1
2980        notchars = self.notChars
2981        maxlen = min( start+self.maxLen, len(instring) )
2982        while loc < maxlen and \
2983              (instring[loc] not in notchars):
2984            loc += 1
2985
2986        if loc - start < self.minLen:
2987            raise ParseException(instring, loc, self.errmsg, self)
2988
2989        return loc, instring[start:loc]
2990
2991    def __str__( self ):
2992        try:
2993            return super(CharsNotIn, self).__str__()
2994        except Exception:
2995            pass
2996
2997        if self.strRepr is None:
2998            if len(self.notChars) > 4:
2999                self.strRepr = "!W:(%s...)" % self.notChars[:4]
3000            else:
3001                self.strRepr = "!W:(%s)" % self.notChars
3002
3003        return self.strRepr
3004
3005class White(Token):
3006    """
3007    Special matching class for matching whitespace.  Normally, whitespace is ignored
3008    by pyparsing grammars.  This class is included when some whitespace structures
3009    are significant.  Define with a string containing the whitespace characters to be
3010    matched; default is C{" \\t\\r\\n"}.  Also takes optional C{min}, C{max}, and C{exact} arguments,
3011    as defined for the C{L{Word}} class.
3012    """
3013    whiteStrs = {
3014        " " : "<SPC>",
3015        "\t": "<TAB>",
3016        "\n": "<LF>",
3017        "\r": "<CR>",
3018        "\f": "<FF>",
3019        }
3020    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3021        super(White,self).__init__()
3022        self.matchWhite = ws
3023        self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
3024        #~ self.leaveWhitespace()
3025        self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3026        self.mayReturnEmpty = True
3027        self.errmsg = "Expected " + self.name
3028
3029        self.minLen = min
3030
3031        if max > 0:
3032            self.maxLen = max
3033        else:
3034            self.maxLen = _MAX_INT
3035
3036        if exact > 0:
3037            self.maxLen = exact
3038            self.minLen = exact
3039
3040    def parseImpl( self, instring, loc, doActions=True ):
3041        if not(instring[ loc ] in self.matchWhite):
3042            raise ParseException(instring, loc, self.errmsg, self)
3043        start = loc
3044        loc += 1
3045        maxloc = start + self.maxLen
3046        maxloc = min( maxloc, len(instring) )
3047        while loc < maxloc and instring[loc] in self.matchWhite:
3048            loc += 1
3049
3050        if loc - start < self.minLen:
3051            raise ParseException(instring, loc, self.errmsg, self)
3052
3053        return loc, instring[start:loc]
3054
3055
3056class _PositionToken(Token):
3057    def __init__( self ):
3058        super(_PositionToken,self).__init__()
3059        self.name=self.__class__.__name__
3060        self.mayReturnEmpty = True
3061        self.mayIndexError = False
3062
3063class GoToColumn(_PositionToken):
3064    """
3065    Token to advance to a specific column of input text; useful for tabular report scraping.
3066    """
3067    def __init__( self, colno ):
3068        super(GoToColumn,self).__init__()
3069        self.col = colno
3070
3071    def preParse( self, instring, loc ):
3072        if col(loc,instring) != self.col:
3073            instrlen = len(instring)
3074            if self.ignoreExprs:
3075                loc = self._skipIgnorables( instring, loc )
3076            while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
3077                loc += 1
3078        return loc
3079
3080    def parseImpl( self, instring, loc, doActions=True ):
3081        thiscol = col( loc, instring )
3082        if thiscol > self.col:
3083            raise ParseException( instring, loc, "Text not in expected column", self )
3084        newloc = loc + self.col - thiscol
3085        ret = instring[ loc: newloc ]
3086        return newloc, ret
3087
3088
3089class LineStart(_PositionToken):
3090    """
3091    Matches if current position is at the beginning of a line within the parse string
3092
3093    Example::
3094
3095        test = '''\
3096        AAA this line
3097        AAA and this line
3098          AAA but not this one
3099        B AAA and definitely not this one
3100        '''
3101
3102        for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3103            print(t)
3104
3105    Prints::
3106        ['AAA', ' this line']
3107        ['AAA', ' and this line']
3108
3109    """
3110    def __init__( self ):
3111        super(LineStart,self).__init__()
3112        self.errmsg = "Expected start of line"
3113
3114    def parseImpl( self, instring, loc, doActions=True ):
3115        if col(loc, instring) == 1:
3116            return loc, []
3117        raise ParseException(instring, loc, self.errmsg, self)
3118
3119class LineEnd(_PositionToken):
3120    """
3121    Matches if current position is at the end of a line within the parse string
3122    """
3123    def __init__( self ):
3124        super(LineEnd,self).__init__()
3125        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
3126        self.errmsg = "Expected end of line"
3127
3128    def parseImpl( self, instring, loc, doActions=True ):
3129        if loc<len(instring):
3130            if instring[loc] == "\n":
3131                return loc+1, "\n"
3132            else:
3133                raise ParseException(instring, loc, self.errmsg, self)
3134        elif loc == len(instring):
3135            return loc+1, []
3136        else:
3137            raise ParseException(instring, loc, self.errmsg, self)
3138
3139class StringStart(_PositionToken):
3140    """
3141    Matches if current position is at the beginning of the parse string
3142    """
3143    def __init__( self ):
3144        super(StringStart,self).__init__()
3145        self.errmsg = "Expected start of text"
3146
3147    def parseImpl( self, instring, loc, doActions=True ):
3148        if loc != 0:
3149            # see if entire string up to here is just whitespace and ignoreables
3150            if loc != self.preParse( instring, 0 ):
3151                raise ParseException(instring, loc, self.errmsg, self)
3152        return loc, []
3153
3154class StringEnd(_PositionToken):
3155    """
3156    Matches if current position is at the end of the parse string
3157    """
3158    def __init__( self ):
3159        super(StringEnd,self).__init__()
3160        self.errmsg = "Expected end of text"
3161
3162    def parseImpl( self, instring, loc, doActions=True ):
3163        if loc < len(instring):
3164            raise ParseException(instring, loc, self.errmsg, self)
3165        elif loc == len(instring):
3166            return loc+1, []
3167        elif loc > len(instring):
3168            return loc, []
3169        else:
3170            raise ParseException(instring, loc, self.errmsg, self)
3171
3172class WordStart(_PositionToken):
3173    """
3174    Matches if the current position is at the beginning of a Word, and
3175    is not preceded by any character in a given set of C{wordChars}
3176    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3177    use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
3178    the string being parsed, or at the beginning of a line.
3179    """
3180    def __init__(self, wordChars = printables):
3181        super(WordStart,self).__init__()
3182        self.wordChars = set(wordChars)
3183        self.errmsg = "Not at the start of a word"
3184
3185    def parseImpl(self, instring, loc, doActions=True ):
3186        if loc != 0:
3187            if (instring[loc-1] in self.wordChars or
3188                instring[loc] not in self.wordChars):
3189                raise ParseException(instring, loc, self.errmsg, self)
3190        return loc, []
3191
3192class WordEnd(_PositionToken):
3193    """
3194    Matches if the current position is at the end of a Word, and
3195    is not followed by any character in a given set of C{wordChars}
3196    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3197    use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
3198    the string being parsed, or at the end of a line.
3199    """
3200    def __init__(self, wordChars = printables):
3201        super(WordEnd,self).__init__()
3202        self.wordChars = set(wordChars)
3203        self.skipWhitespace = False
3204        self.errmsg = "Not at the end of a word"
3205
3206    def parseImpl(self, instring, loc, doActions=True ):
3207        instrlen = len(instring)
3208        if instrlen>0 and loc<instrlen:
3209            if (instring[loc] in self.wordChars or
3210                instring[loc-1] not in self.wordChars):
3211                raise ParseException(instring, loc, self.errmsg, self)
3212        return loc, []
3213
3214
3215class ParseExpression(ParserElement):
3216    """
3217    Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
3218    """
3219    def __init__( self, exprs, savelist = False ):
3220        super(ParseExpression,self).__init__(savelist)
3221        if isinstance( exprs, _generatorType ):
3222            exprs = list(exprs)
3223
3224        if isinstance( exprs, basestring ):
3225            self.exprs = [ ParserElement._literalStringClass( exprs ) ]
3226        elif isinstance( exprs, collections.Iterable ):
3227            exprs = list(exprs)
3228            # if sequence of strings provided, wrap with Literal
3229            if all(isinstance(expr, basestring) for expr in exprs):
3230                exprs = map(ParserElement._literalStringClass, exprs)
3231            self.exprs = list(exprs)
3232        else:
3233            try:
3234                self.exprs = list( exprs )
3235            except TypeError:
3236                self.exprs = [ exprs ]
3237        self.callPreparse = False
3238
3239    def __getitem__( self, i ):
3240        return self.exprs[i]
3241
3242    def append( self, other ):
3243        self.exprs.append( other )
3244        self.strRepr = None
3245        return self
3246
3247    def leaveWhitespace( self ):
3248        """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
3249           all contained expressions."""
3250        self.skipWhitespace = False
3251        self.exprs = [ e.copy() for e in self.exprs ]
3252        for e in self.exprs:
3253            e.leaveWhitespace()
3254        return self
3255
3256    def ignore( self, other ):
3257        if isinstance( other, Suppress ):
3258            if other not in self.ignoreExprs:
3259                super( ParseExpression, self).ignore( other )
3260                for e in self.exprs:
3261                    e.ignore( self.ignoreExprs[-1] )
3262        else:
3263            super( ParseExpression, self).ignore( other )
3264            for e in self.exprs:
3265                e.ignore( self.ignoreExprs[-1] )
3266        return self
3267
3268    def __str__( self ):
3269        try:
3270            return super(ParseExpression,self).__str__()
3271        except Exception:
3272            pass
3273
3274        if self.strRepr is None:
3275            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
3276        return self.strRepr
3277
3278    def streamline( self ):
3279        super(ParseExpression,self).streamline()
3280
3281        for e in self.exprs:
3282            e.streamline()
3283
3284        # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
3285        # but only if there are no parse actions or resultsNames on the nested And's
3286        # (likewise for Or's and MatchFirst's)
3287        if ( len(self.exprs) == 2 ):
3288            other = self.exprs[0]
3289            if ( isinstance( other, self.__class__ ) and
3290                  not(other.parseAction) and
3291                  other.resultsName is None and
3292                  not other.debug ):
3293                self.exprs = other.exprs[:] + [ self.exprs[1] ]
3294                self.strRepr = None
3295                self.mayReturnEmpty |= other.mayReturnEmpty
3296                self.mayIndexError  |= other.mayIndexError
3297
3298            other = self.exprs[-1]
3299            if ( isinstance( other, self.__class__ ) and
3300                  not(other.parseAction) and
3301                  other.resultsName is None and
3302                  not other.debug ):
3303                self.exprs = self.exprs[:-1] + other.exprs[:]
3304                self.strRepr = None
3305                self.mayReturnEmpty |= other.mayReturnEmpty
3306                self.mayIndexError  |= other.mayIndexError
3307
3308        self.errmsg = "Expected " + _ustr(self)
3309
3310        return self
3311
3312    def setResultsName( self, name, listAllMatches=False ):
3313        ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
3314        return ret
3315
3316    def validate( self, validateTrace=[] ):
3317        tmp = validateTrace[:]+[self]
3318        for e in self.exprs:
3319            e.validate(tmp)
3320        self.checkRecursion( [] )
3321
3322    def copy(self):
3323        ret = super(ParseExpression,self).copy()
3324        ret.exprs = [e.copy() for e in self.exprs]
3325        return ret
3326
3327class And(ParseExpression):
3328    """
3329    Requires all given C{ParseExpression}s to be found in the given order.
3330    Expressions may be separated by whitespace.
3331    May be constructed using the C{'+'} operator.
3332    May also be constructed using the C{'-'} operator, which will suppress backtracking.
3333
3334    Example::
3335        integer = Word(nums)
3336        name_expr = OneOrMore(Word(alphas))
3337
3338        expr = And([integer("id"),name_expr("name"),integer("age")])
3339        # more easily written as:
3340        expr = integer("id") + name_expr("name") + integer("age")
3341    """
3342
3343    class _ErrorStop(Empty):
3344        def __init__(self, *args, **kwargs):
3345            super(And._ErrorStop,self).__init__(*args, **kwargs)
3346            self.name = '-'
3347            self.leaveWhitespace()
3348
3349    def __init__( self, exprs, savelist = True ):
3350        super(And,self).__init__(exprs, savelist)
3351        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3352        self.setWhitespaceChars( self.exprs[0].whiteChars )
3353        self.skipWhitespace = self.exprs[0].skipWhitespace
3354        self.callPreparse = True
3355
3356    def parseImpl( self, instring, loc, doActions=True ):
3357        # pass False as last arg to _parse for first element, since we already
3358        # pre-parsed the string as part of our And pre-parsing
3359        loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
3360        errorStop = False
3361        for e in self.exprs[1:]:
3362            if isinstance(e, And._ErrorStop):
3363                errorStop = True
3364                continue
3365            if errorStop:
3366                try:
3367                    loc, exprtokens = e._parse( instring, loc, doActions )
3368                except ParseSyntaxException:
3369                    raise
3370                except ParseBaseException as pe:
3371                    pe.__traceback__ = None
3372                    raise ParseSyntaxException._from_exception(pe)
3373                except IndexError:
3374                    raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
3375            else:
3376                loc, exprtokens = e._parse( instring, loc, doActions )
3377            if exprtokens or exprtokens.haskeys():
3378                resultlist += exprtokens
3379        return loc, resultlist
3380
3381    def __iadd__(self, other ):
3382        if isinstance( other, basestring ):
3383            other = ParserElement._literalStringClass( other )
3384        return self.append( other ) #And( [ self, other ] )
3385
3386    def checkRecursion( self, parseElementList ):
3387        subRecCheckList = parseElementList[:] + [ self ]
3388        for e in self.exprs:
3389            e.checkRecursion( subRecCheckList )
3390            if not e.mayReturnEmpty:
3391                break
3392
3393    def __str__( self ):
3394        if hasattr(self,"name"):
3395            return self.name
3396
3397        if self.strRepr is None:
3398            self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
3399
3400        return self.strRepr
3401
3402
3403class Or(ParseExpression):
3404    """
3405    Requires that at least one C{ParseExpression} is found.
3406    If two expressions match, the expression that matches the longest string will be used.
3407    May be constructed using the C{'^'} operator.
3408
3409    Example::
3410        # construct Or using '^' operator
3411
3412        number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3413        print(number.searchString("123 3.1416 789"))
3414    prints::
3415        [['123'], ['3.1416'], ['789']]
3416    """
3417    def __init__( self, exprs, savelist = False ):
3418        super(Or,self).__init__(exprs, savelist)
3419        if self.exprs:
3420            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3421        else:
3422            self.mayReturnEmpty = True
3423
3424    def parseImpl( self, instring, loc, doActions=True ):
3425        maxExcLoc = -1
3426        maxException = None
3427        matches = []
3428        for e in self.exprs:
3429            try:
3430                loc2 = e.tryParse( instring, loc )
3431            except ParseException as err:
3432                err.__traceback__ = None
3433                if err.loc > maxExcLoc:
3434                    maxException = err
3435                    maxExcLoc = err.loc
3436            except IndexError:
3437                if len(instring) > maxExcLoc:
3438                    maxException = ParseException(instring,len(instring),e.errmsg,self)
3439                    maxExcLoc = len(instring)
3440            else:
3441                # save match among all matches, to retry longest to shortest
3442                matches.append((loc2, e))
3443
3444        if matches:
3445            matches.sort(key=lambda x: -x[0])
3446            for _,e in matches:
3447                try:
3448                    return e._parse( instring, loc, doActions )
3449                except ParseException as err:
3450                    err.__traceback__ = None
3451                    if err.loc > maxExcLoc:
3452                        maxException = err
3453                        maxExcLoc = err.loc
3454
3455        if maxException is not None:
3456            maxException.msg = self.errmsg
3457            raise maxException
3458        else:
3459            raise ParseException(instring, loc, "no defined alternatives to match", self)
3460
3461
3462    def __ixor__(self, other ):
3463        if isinstance( other, basestring ):
3464            other = ParserElement._literalStringClass( other )
3465        return self.append( other ) #Or( [ self, other ] )
3466
3467    def __str__( self ):
3468        if hasattr(self,"name"):
3469            return self.name
3470
3471        if self.strRepr is None:
3472            self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
3473
3474        return self.strRepr
3475
3476    def checkRecursion( self, parseElementList ):
3477        subRecCheckList = parseElementList[:] + [ self ]
3478        for e in self.exprs:
3479            e.checkRecursion( subRecCheckList )
3480
3481
3482class MatchFirst(ParseExpression):
3483    """
3484    Requires that at least one C{ParseExpression} is found.
3485    If two expressions match, the first one listed is the one that will match.
3486    May be constructed using the C{'|'} operator.
3487
3488    Example::
3489        # construct MatchFirst using '|' operator
3490
3491        # watch the order of expressions to match
3492        number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
3493        print(number.searchString("123 3.1416 789")) #  Fail! -> [['123'], ['3'], ['1416'], ['789']]
3494
3495        # put more selective expression first
3496        number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
3497        print(number.searchString("123 3.1416 789")) #  Better -> [['123'], ['3.1416'], ['789']]
3498    """
3499    def __init__( self, exprs, savelist = False ):
3500        super(MatchFirst,self).__init__(exprs, savelist)
3501        if self.exprs:
3502            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3503        else:
3504            self.mayReturnEmpty = True
3505
3506    def parseImpl( self, instring, loc, doActions=True ):
3507        maxExcLoc = -1
3508        maxException = None
3509        for e in self.exprs:
3510            try:
3511                ret = e._parse( instring, loc, doActions )
3512                return ret
3513            except ParseException as err:
3514                if err.loc > maxExcLoc:
3515                    maxException = err
3516                    maxExcLoc = err.loc
3517            except IndexError:
3518                if len(instring) > maxExcLoc:
3519                    maxException = ParseException(instring,len(instring),e.errmsg,self)
3520                    maxExcLoc = len(instring)
3521
3522        # only got here if no expression matched, raise exception for match that made it the furthest
3523        else:
3524            if maxException is not None:
3525                maxException.msg = self.errmsg
3526                raise maxException
3527            else:
3528                raise ParseException(instring, loc, "no defined alternatives to match", self)
3529
3530    def __ior__(self, other ):
3531        if isinstance( other, basestring ):
3532            other = ParserElement._literalStringClass( other )
3533        return self.append( other ) #MatchFirst( [ self, other ] )
3534
3535    def __str__( self ):
3536        if hasattr(self,"name"):
3537            return self.name
3538
3539        if self.strRepr is None:
3540            self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
3541
3542        return self.strRepr
3543
3544    def checkRecursion( self, parseElementList ):
3545        subRecCheckList = parseElementList[:] + [ self ]
3546        for e in self.exprs:
3547            e.checkRecursion( subRecCheckList )
3548
3549
3550class Each(ParseExpression):
3551    """
3552    Requires all given C{ParseExpression}s to be found, but in any order.
3553    Expressions may be separated by whitespace.
3554    May be constructed using the C{'&'} operator.
3555
3556    Example::
3557        color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
3558        shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
3559        integer = Word(nums)
3560        shape_attr = "shape:" + shape_type("shape")
3561        posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
3562        color_attr = "color:" + color("color")
3563        size_attr = "size:" + integer("size")
3564
3565        # use Each (using operator '&') to accept attributes in any order
3566        # (shape and posn are required, color and size are optional)
3567        shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
3568
3569        shape_spec.runTests('''
3570            shape: SQUARE color: BLACK posn: 100, 120
3571            shape: CIRCLE size: 50 color: BLUE posn: 50,80
3572            color:GREEN size:20 shape:TRIANGLE posn:20,40
3573            '''
3574            )
3575    prints::
3576        shape: SQUARE color: BLACK posn: 100, 120
3577        ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
3578        - color: BLACK
3579        - posn: ['100', ',', '120']
3580          - x: 100
3581          - y: 120
3582        - shape: SQUARE
3583
3584
3585        shape: CIRCLE size: 50 color: BLUE posn: 50,80
3586        ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
3587        - color: BLUE
3588        - posn: ['50', ',', '80']
3589          - x: 50
3590          - y: 80
3591        - shape: CIRCLE
3592        - size: 50
3593
3594
3595        color: GREEN size: 20 shape: TRIANGLE posn: 20,40
3596        ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
3597        - color: GREEN
3598        - posn: ['20', ',', '40']
3599          - x: 20
3600          - y: 40
3601        - shape: TRIANGLE
3602        - size: 20
3603    """
3604    def __init__( self, exprs, savelist = True ):
3605        super(Each,self).__init__(exprs, savelist)
3606        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3607        self.skipWhitespace = True
3608        self.initExprGroups = True
3609
3610    def parseImpl( self, instring, loc, doActions=True ):
3611        if self.initExprGroups:
3612            self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
3613            opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
3614            opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
3615            self.optionals = opt1 + opt2
3616            self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
3617            self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
3618            self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
3619            self.required += self.multirequired
3620            self.initExprGroups = False
3621        tmpLoc = loc
3622        tmpReqd = self.required[:]
3623        tmpOpt  = self.optionals[:]
3624        matchOrder = []
3625
3626        keepMatching = True
3627        while keepMatching:
3628            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
3629            failed = []
3630            for e in tmpExprs:
3631                try:
3632                    tmpLoc = e.tryParse( instring, tmpLoc )
3633                except ParseException:
3634                    failed.append(e)
3635                else:
3636                    matchOrder.append(self.opt1map.get(id(e),e))
3637                    if e in tmpReqd:
3638                        tmpReqd.remove(e)
3639                    elif e in tmpOpt:
3640                        tmpOpt.remove(e)
3641            if len(failed) == len(tmpExprs):
3642                keepMatching = False
3643
3644        if tmpReqd:
3645            missing = ", ".join(_ustr(e) for e in tmpReqd)
3646            raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
3647
3648        # add any unmatched Optionals, in case they have default values defined
3649        matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
3650
3651        resultlist = []
3652        for e in matchOrder:
3653            loc,results = e._parse(instring,loc,doActions)
3654            resultlist.append(results)
3655
3656        finalResults = sum(resultlist, ParseResults([]))
3657        return loc, finalResults
3658
3659    def __str__( self ):
3660        if hasattr(self,"name"):
3661            return self.name
3662
3663        if self.strRepr is None:
3664            self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
3665
3666        return self.strRepr
3667
3668    def checkRecursion( self, parseElementList ):
3669        subRecCheckList = parseElementList[:] + [ self ]
3670        for e in self.exprs:
3671            e.checkRecursion( subRecCheckList )
3672
3673
3674class ParseElementEnhance(ParserElement):
3675    """
3676    Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
3677    """
3678    def __init__( self, expr, savelist=False ):
3679        super(ParseElementEnhance,self).__init__(savelist)
3680        if isinstance( expr, basestring ):
3681            if issubclass(ParserElement._literalStringClass, Token):
3682                expr = ParserElement._literalStringClass(expr)
3683            else:
3684                expr = ParserElement._literalStringClass(Literal(expr))
3685        self.expr = expr
3686        self.strRepr = None
3687        if expr is not None:
3688            self.mayIndexError = expr.mayIndexError
3689            self.mayReturnEmpty = expr.mayReturnEmpty
3690            self.setWhitespaceChars( expr.whiteChars )
3691            self.skipWhitespace = expr.skipWhitespace
3692            self.saveAsList = expr.saveAsList
3693            self.callPreparse = expr.callPreparse
3694            self.ignoreExprs.extend(expr.ignoreExprs)
3695
3696    def parseImpl( self, instring, loc, doActions=True ):
3697        if self.expr is not None:
3698            return self.expr._parse( instring, loc, doActions, callPreParse=False )
3699        else:
3700            raise ParseException("",loc,self.errmsg,self)
3701
3702    def leaveWhitespace( self ):
3703        self.skipWhitespace = False
3704        self.expr = self.expr.copy()
3705        if self.expr is not None:
3706            self.expr.leaveWhitespace()
3707        return self
3708
3709    def ignore( self, other ):
3710        if isinstance( other, Suppress ):
3711            if other not in self.ignoreExprs:
3712                super( ParseElementEnhance, self).ignore( other )
3713                if self.expr is not None:
3714                    self.expr.ignore( self.ignoreExprs[-1] )
3715        else:
3716            super( ParseElementEnhance, self).ignore( other )
3717            if self.expr is not None:
3718                self.expr.ignore( self.ignoreExprs[-1] )
3719        return self
3720
3721    def streamline( self ):
3722        super(ParseElementEnhance,self).streamline()
3723        if self.expr is not None:
3724            self.expr.streamline()
3725        return self
3726
3727    def checkRecursion( self, parseElementList ):
3728        if self in parseElementList:
3729            raise RecursiveGrammarException( parseElementList+[self] )
3730        subRecCheckList = parseElementList[:] + [ self ]
3731        if self.expr is not None:
3732            self.expr.checkRecursion( subRecCheckList )
3733
3734    def validate( self, validateTrace=[] ):
3735        tmp = validateTrace[:]+[self]
3736        if self.expr is not None:
3737            self.expr.validate(tmp)
3738        self.checkRecursion( [] )
3739
3740    def __str__( self ):
3741        try:
3742            return super(ParseElementEnhance,self).__str__()
3743        except Exception:
3744            pass
3745
3746        if self.strRepr is None and self.expr is not None:
3747            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
3748        return self.strRepr
3749
3750
3751class FollowedBy(ParseElementEnhance):
3752    """
3753    Lookahead matching of the given parse expression.  C{FollowedBy}
3754    does I{not} advance the parsing position within the input string, it only
3755    verifies that the specified parse expression matches at the current
3756    position.  C{FollowedBy} always returns a null token list.
3757
3758    Example::
3759        # use FollowedBy to match a label only if it is followed by a ':'
3760        data_word = Word(alphas)
3761        label = data_word + FollowedBy(':')
3762        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3763
3764        OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
3765    prints::
3766        [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
3767    """
3768    def __init__( self, expr ):
3769        super(FollowedBy,self).__init__(expr)
3770        self.mayReturnEmpty = True
3771
3772    def parseImpl( self, instring, loc, doActions=True ):
3773        self.expr.tryParse( instring, loc )
3774        return loc, []
3775
3776
3777class NotAny(ParseElementEnhance):
3778    """
3779    Lookahead to disallow matching with the given parse expression.  C{NotAny}
3780    does I{not} advance the parsing position within the input string, it only
3781    verifies that the specified parse expression does I{not} match at the current
3782    position.  Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
3783    always returns a null token list.  May be constructed using the '~' operator.
3784
3785    Example::
3786
3787    """
3788    def __init__( self, expr ):
3789        super(NotAny,self).__init__(expr)
3790        #~ self.leaveWhitespace()
3791        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
3792        self.mayReturnEmpty = True
3793        self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3794
3795    def parseImpl( self, instring, loc, doActions=True ):
3796        if self.expr.canParseNext(instring, loc):
3797            raise ParseException(instring, loc, self.errmsg, self)
3798        return loc, []
3799
3800    def __str__( self ):
3801        if hasattr(self,"name"):
3802            return self.name
3803
3804        if self.strRepr is None:
3805            self.strRepr = "~{" + _ustr(self.expr) + "}"
3806
3807        return self.strRepr
3808
3809class _MultipleMatch(ParseElementEnhance):
3810    def __init__( self, expr, stopOn=None):
3811        super(_MultipleMatch, self).__init__(expr)
3812        self.saveAsList = True
3813        ender = stopOn
3814        if isinstance(ender, basestring):
3815            ender = ParserElement._literalStringClass(ender)
3816        self.not_ender = ~ender if ender is not None else None
3817
3818    def parseImpl( self, instring, loc, doActions=True ):
3819        self_expr_parse = self.expr._parse
3820        self_skip_ignorables = self._skipIgnorables
3821        check_ender = self.not_ender is not None
3822        if check_ender:
3823            try_not_ender = self.not_ender.tryParse
3824
3825        # must be at least one (but first see if we are the stopOn sentinel;
3826        # if so, fail)
3827        if check_ender:
3828            try_not_ender(instring, loc)
3829        loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
3830        try:
3831            hasIgnoreExprs = (not not self.ignoreExprs)
3832            while 1:
3833                if check_ender:
3834                    try_not_ender(instring, loc)
3835                if hasIgnoreExprs:
3836                    preloc = self_skip_ignorables( instring, loc )
3837                else:
3838                    preloc = loc
3839                loc, tmptokens = self_expr_parse( instring, preloc, doActions )
3840                if tmptokens or tmptokens.haskeys():
3841                    tokens += tmptokens
3842        except (ParseException,IndexError):
3843            pass
3844
3845        return loc, tokens
3846
3847class OneOrMore(_MultipleMatch):
3848    """
3849    Repetition of one or more of the given expression.
3850
3851    Parameters:
3852     - expr - expression that must match one or more times
3853     - stopOn - (default=C{None}) - expression for a terminating sentinel
3854          (only required if the sentinel would ordinarily match the repetition
3855          expression)
3856
3857    Example::
3858        data_word = Word(alphas)
3859        label = data_word + FollowedBy(':')
3860        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
3861
3862        text = "shape: SQUARE posn: upper left color: BLACK"
3863        OneOrMore(attr_expr).parseString(text).pprint()  # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
3864
3865        # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
3866        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3867        OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
3868
3869        # could also be written as
3870        (attr_expr * (1,)).parseString(text).pprint()
3871    """
3872
3873    def __str__( self ):
3874        if hasattr(self,"name"):
3875            return self.name
3876
3877        if self.strRepr is None:
3878            self.strRepr = "{" + _ustr(self.expr) + "}..."
3879
3880        return self.strRepr
3881
3882class ZeroOrMore(_MultipleMatch):
3883    """
3884    Optional repetition of zero or more of the given expression.
3885
3886    Parameters:
3887     - expr - expression that must match zero or more times
3888     - stopOn - (default=C{None}) - expression for a terminating sentinel
3889          (only required if the sentinel would ordinarily match the repetition
3890          expression)
3891
3892    Example: similar to L{OneOrMore}
3893    """
3894    def __init__( self, expr, stopOn=None):
3895        super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
3896        self.mayReturnEmpty = True
3897
3898    def parseImpl( self, instring, loc, doActions=True ):
3899        try:
3900            return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
3901        except (ParseException,IndexError):
3902            return loc, []
3903
3904    def __str__( self ):
3905        if hasattr(self,"name"):
3906            return self.name
3907
3908        if self.strRepr is None:
3909            self.strRepr = "[" + _ustr(self.expr) + "]..."
3910
3911        return self.strRepr
3912
3913class _NullToken(object):
3914    def __bool__(self):
3915        return False
3916    __nonzero__ = __bool__
3917    def __str__(self):
3918        return ""
3919
3920_optionalNotMatched = _NullToken()
3921class Optional(ParseElementEnhance):
3922    """
3923    Optional matching of the given expression.
3924
3925    Parameters:
3926     - expr - expression that must match zero or more times
3927     - default (optional) - value to be returned if the optional expression is not found.
3928
3929    Example::
3930        # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
3931        zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
3932        zip.runTests('''
3933            # traditional ZIP code
3934            12345
3935
3936            # ZIP+4 form
3937            12101-0001
3938
3939            # invalid ZIP
3940            98765-
3941            ''')
3942    prints::
3943        # traditional ZIP code
3944        12345
3945        ['12345']
3946
3947        # ZIP+4 form
3948        12101-0001
3949        ['12101-0001']
3950
3951        # invalid ZIP
3952        98765-
3953             ^
3954        FAIL: Expected end of text (at char 5), (line:1, col:6)
3955    """
3956    def __init__( self, expr, default=_optionalNotMatched ):
3957        super(Optional,self).__init__( expr, savelist=False )
3958        self.saveAsList = self.expr.saveAsList
3959        self.defaultValue = default
3960        self.mayReturnEmpty = True
3961
3962    def parseImpl( self, instring, loc, doActions=True ):
3963        try:
3964            loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
3965        except (ParseException,IndexError):
3966            if self.defaultValue is not _optionalNotMatched:
3967                if self.expr.resultsName:
3968                    tokens = ParseResults([ self.defaultValue ])
3969                    tokens[self.expr.resultsName] = self.defaultValue
3970                else:
3971                    tokens = [ self.defaultValue ]
3972            else:
3973                tokens = []
3974        return loc, tokens
3975
3976    def __str__( self ):
3977        if hasattr(self,"name"):
3978            return self.name
3979
3980        if self.strRepr is None:
3981            self.strRepr = "[" + _ustr(self.expr) + "]"
3982
3983        return self.strRepr
3984
3985class SkipTo(ParseElementEnhance):
3986    """
3987    Token for skipping over all undefined text until the matched expression is found.
3988
3989    Parameters:
3990     - expr - target expression marking the end of the data to be skipped
3991     - include - (default=C{False}) if True, the target expression is also parsed
3992          (the skipped text and target expression are returned as a 2-element list).
3993     - ignore - (default=C{None}) used to define grammars (typically quoted strings and
3994          comments) that might contain false matches to the target expression
3995     - failOn - (default=C{None}) define expressions that are not allowed to be
3996          included in the skipped test; if found before the target expression is found,
3997          the SkipTo is not a match
3998
3999    Example::
4000        report = '''
4001            Outstanding Issues Report - 1 Jan 2000
4002
4003               # | Severity | Description                               |  Days Open
4004            -----+----------+-------------------------------------------+-----------
4005             101 | Critical | Intermittent system crash                 |          6
4006              94 | Cosmetic | Spelling error on Login ('log|n')         |         14
4007              79 | Minor    | System slow when running too many reports |         47
4008            '''
4009        integer = Word(nums)
4010        SEP = Suppress('|')
4011        # use SkipTo to simply match everything up until the next SEP
4012        # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4013        # - parse action will call token.strip() for each matched token, i.e., the description body
4014        string_data = SkipTo(SEP, ignore=quotedString)
4015        string_data.setParseAction(tokenMap(str.strip))
4016        ticket_expr = (integer("issue_num") + SEP
4017                      + string_data("sev") + SEP
4018                      + string_data("desc") + SEP
4019                      + integer("days_open"))
4020
4021        for tkt in ticket_expr.searchString(report):
4022            print tkt.dump()
4023    prints::
4024        ['101', 'Critical', 'Intermittent system crash', '6']
4025        - days_open: 6
4026        - desc: Intermittent system crash
4027        - issue_num: 101
4028        - sev: Critical
4029        ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4030        - days_open: 14
4031        - desc: Spelling error on Login ('log|n')
4032        - issue_num: 94
4033        - sev: Cosmetic
4034        ['79', 'Minor', 'System slow when running too many reports', '47']
4035        - days_open: 47
4036        - desc: System slow when running too many reports
4037        - issue_num: 79
4038        - sev: Minor
4039    """
4040    def __init__( self, other, include=False, ignore=None, failOn=None ):
4041        super( SkipTo, self ).__init__( other )
4042        self.ignoreExpr = ignore
4043        self.mayReturnEmpty = True
4044        self.mayIndexError = False
4045        self.includeMatch = include
4046        self.asList = False
4047        if isinstance(failOn, basestring):
4048            self.failOn = ParserElement._literalStringClass(failOn)
4049        else:
4050            self.failOn = failOn
4051        self.errmsg = "No match found for "+_ustr(self.expr)
4052
4053    def parseImpl( self, instring, loc, doActions=True ):
4054        startloc = loc
4055        instrlen = len(instring)
4056        expr = self.expr
4057        expr_parse = self.expr._parse
4058        self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4059        self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4060
4061        tmploc = loc
4062        while tmploc <= instrlen:
4063            if self_failOn_canParseNext is not None:
4064                # break if failOn expression matches
4065                if self_failOn_canParseNext(instring, tmploc):
4066                    break
4067
4068            if self_ignoreExpr_tryParse is not None:
4069                # advance past ignore expressions
4070                while 1:
4071                    try:
4072                        tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4073                    except ParseBaseException:
4074                        break
4075
4076            try:
4077                expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4078            except (ParseException, IndexError):
4079                # no match, advance loc in string
4080                tmploc += 1
4081            else:
4082                # matched skipto expr, done
4083                break
4084
4085        else:
4086            # ran off the end of the input string without matching skipto expr, fail
4087            raise ParseException(instring, loc, self.errmsg, self)
4088
4089        # build up return values
4090        loc = tmploc
4091        skiptext = instring[startloc:loc]
4092        skipresult = ParseResults(skiptext)
4093
4094        if self.includeMatch:
4095            loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
4096            skipresult += mat
4097
4098        return loc, skipresult
4099
4100class Forward(ParseElementEnhance):
4101    """
4102    Forward declaration of an expression to be defined later -
4103    used for recursive grammars, such as algebraic infix notation.
4104    When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
4105
4106    Note: take care when assigning to C{Forward} not to overlook precedence of operators.
4107    Specifically, '|' has a lower precedence than '<<', so that::
4108        fwdExpr << a | b | c
4109    will actually be evaluated as::
4110        (fwdExpr << a) | b | c
4111    thereby leaving b and c out as parseable alternatives.  It is recommended that you
4112    explicitly group the values inserted into the C{Forward}::
4113        fwdExpr << (a | b | c)
4114    Converting to use the '<<=' operator instead will avoid this problem.
4115
4116    See L{ParseResults.pprint} for an example of a recursive parser created using
4117    C{Forward}.
4118    """
4119    def __init__( self, other=None ):
4120        super(Forward,self).__init__( other, savelist=False )
4121
4122    def __lshift__( self, other ):
4123        if isinstance( other, basestring ):
4124            other = ParserElement._literalStringClass(other)
4125        self.expr = other
4126        self.strRepr = None
4127        self.mayIndexError = self.expr.mayIndexError
4128        self.mayReturnEmpty = self.expr.mayReturnEmpty
4129        self.setWhitespaceChars( self.expr.whiteChars )
4130        self.skipWhitespace = self.expr.skipWhitespace
4131        self.saveAsList = self.expr.saveAsList
4132        self.ignoreExprs.extend(self.expr.ignoreExprs)
4133        return self
4134
4135    def __ilshift__(self, other):
4136        return self << other
4137
4138    def leaveWhitespace( self ):
4139        self.skipWhitespace = False
4140        return self
4141
4142    def streamline( self ):
4143        if not self.streamlined:
4144            self.streamlined = True
4145            if self.expr is not None:
4146                self.expr.streamline()
4147        return self
4148
4149    def validate( self, validateTrace=[] ):
4150        if self not in validateTrace:
4151            tmp = validateTrace[:]+[self]
4152            if self.expr is not None:
4153                self.expr.validate(tmp)
4154        self.checkRecursion([])
4155
4156    def __str__( self ):
4157        if hasattr(self,"name"):
4158            return self.name
4159        return self.__class__.__name__ + ": ..."
4160
4161        # stubbed out for now - creates awful memory and perf issues
4162        self._revertClass = self.__class__
4163        self.__class__ = _ForwardNoRecurse
4164        try:
4165            if self.expr is not None:
4166                retString = _ustr(self.expr)
4167            else:
4168                retString = "None"
4169        finally:
4170            self.__class__ = self._revertClass
4171        return self.__class__.__name__ + ": " + retString
4172
4173    def copy(self):
4174        if self.expr is not None:
4175            return super(Forward,self).copy()
4176        else:
4177            ret = Forward()
4178            ret <<= self
4179            return ret
4180
4181class _ForwardNoRecurse(Forward):
4182    def __str__( self ):
4183        return "..."
4184
4185class TokenConverter(ParseElementEnhance):
4186    """
4187    Abstract subclass of C{ParseExpression}, for converting parsed results.
4188    """
4189    def __init__( self, expr, savelist=False ):
4190        super(TokenConverter,self).__init__( expr )#, savelist )
4191        self.saveAsList = False
4192
4193class Combine(TokenConverter):
4194    """
4195    Converter to concatenate all matching tokens to a single string.
4196    By default, the matching patterns must also be contiguous in the input string;
4197    this can be disabled by specifying C{'adjacent=False'} in the constructor.
4198
4199    Example::
4200        real = Word(nums) + '.' + Word(nums)
4201        print(real.parseString('3.1416')) # -> ['3', '.', '1416']
4202        # will also erroneously match the following
4203        print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
4204
4205        real = Combine(Word(nums) + '.' + Word(nums))
4206        print(real.parseString('3.1416')) # -> ['3.1416']
4207        # no match when there are internal spaces
4208        print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
4209    """
4210    def __init__( self, expr, joinString="", adjacent=True ):
4211        super(Combine,self).__init__( expr )
4212        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
4213        if adjacent:
4214            self.leaveWhitespace()
4215        self.adjacent = adjacent
4216        self.skipWhitespace = True
4217        self.joinString = joinString
4218        self.callPreparse = True
4219
4220    def ignore( self, other ):
4221        if self.adjacent:
4222            ParserElement.ignore(self, other)
4223        else:
4224            super( Combine, self).ignore( other )
4225        return self
4226
4227    def postParse( self, instring, loc, tokenlist ):
4228        retToks = tokenlist.copy()
4229        del retToks[:]
4230        retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
4231
4232        if self.resultsName and retToks.haskeys():
4233            return [ retToks ]
4234        else:
4235            return retToks
4236
4237class Group(TokenConverter):
4238    """
4239    Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
4240
4241    Example::
4242        ident = Word(alphas)
4243        num = Word(nums)
4244        term = ident | num
4245        func = ident + Optional(delimitedList(term))
4246        print(func.parseString("fn a,b,100"))  # -> ['fn', 'a', 'b', '100']
4247
4248        func = ident + Group(Optional(delimitedList(term)))
4249        print(func.parseString("fn a,b,100"))  # -> ['fn', ['a', 'b', '100']]
4250    """
4251    def __init__( self, expr ):
4252        super(Group,self).__init__( expr )
4253        self.saveAsList = True
4254
4255    def postParse( self, instring, loc, tokenlist ):
4256        return [ tokenlist ]
4257
4258class Dict(TokenConverter):
4259    """
4260    Converter to return a repetitive expression as a list, but also as a dictionary.
4261    Each element can also be referenced using the first token in the expression as its key.
4262    Useful for tabular report scraping when the first column can be used as a item key.
4263
4264    Example::
4265        data_word = Word(alphas)
4266        label = data_word + FollowedBy(':')
4267        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4268
4269        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4270        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4271
4272        # print attributes as plain groups
4273        print(OneOrMore(attr_expr).parseString(text).dump())
4274
4275        # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
4276        result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
4277        print(result.dump())
4278
4279        # access named fields as dict entries, or output as dict
4280        print(result['shape'])
4281        print(result.asDict())
4282    prints::
4283        ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
4284
4285        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4286        - color: light blue
4287        - posn: upper left
4288        - shape: SQUARE
4289        - texture: burlap
4290        SQUARE
4291        {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
4292    See more examples at L{ParseResults} of accessing fields by results name.
4293    """
4294    def __init__( self, expr ):
4295        super(Dict,self).__init__( expr )
4296        self.saveAsList = True
4297
4298    def postParse( self, instring, loc, tokenlist ):
4299        for i,tok in enumerate(tokenlist):
4300            if len(tok) == 0:
4301                continue
4302            ikey = tok[0]
4303            if isinstance(ikey,int):
4304                ikey = _ustr(tok[0]).strip()
4305            if len(tok)==1:
4306                tokenlist[ikey] = _ParseResultsWithOffset("",i)
4307            elif len(tok)==2 and not isinstance(tok[1],ParseResults):
4308                tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
4309            else:
4310                dictvalue = tok.copy() #ParseResults(i)
4311                del dictvalue[0]
4312                if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
4313                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
4314                else:
4315                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
4316
4317        if self.resultsName:
4318            return [ tokenlist ]
4319        else:
4320            return tokenlist
4321
4322
4323class Suppress(TokenConverter):
4324    """
4325    Converter for ignoring the results of a parsed expression.
4326
4327    Example::
4328        source = "a, b, c,d"
4329        wd = Word(alphas)
4330        wd_list1 = wd + ZeroOrMore(',' + wd)
4331        print(wd_list1.parseString(source))
4332
4333        # often, delimiters that are useful during parsing are just in the
4334        # way afterward - use Suppress to keep them out of the parsed output
4335        wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
4336        print(wd_list2.parseString(source))
4337    prints::
4338        ['a', ',', 'b', ',', 'c', ',', 'd']
4339        ['a', 'b', 'c', 'd']
4340    (See also L{delimitedList}.)
4341    """
4342    def postParse( self, instring, loc, tokenlist ):
4343        return []
4344
4345    def suppress( self ):
4346        return self
4347
4348
4349class OnlyOnce(object):
4350    """
4351    Wrapper for parse actions, to ensure they are only called once.
4352    """
4353    def __init__(self, methodCall):
4354        self.callable = _trim_arity(methodCall)
4355        self.called = False
4356    def __call__(self,s,l,t):
4357        if not self.called:
4358            results = self.callable(s,l,t)
4359            self.called = True
4360            return results
4361        raise ParseException(s,l,"")
4362    def reset(self):
4363        self.called = False
4364
4365def traceParseAction(f):
4366    """
4367    Decorator for debugging parse actions.
4368
4369    When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
4370    When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
4371
4372    Example::
4373        wd = Word(alphas)
4374
4375        @traceParseAction
4376        def remove_duplicate_chars(tokens):
4377            return ''.join(sorted(set(''.join(tokens)))
4378
4379        wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
4380        print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
4381    prints::
4382        >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
4383        <<leaving remove_duplicate_chars (ret: 'dfjkls')
4384        ['dfjkls']
4385    """
4386    f = _trim_arity(f)
4387    def z(*paArgs):
4388        thisFunc = f.__name__
4389        s,l,t = paArgs[-3:]
4390        if len(paArgs)>3:
4391            thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
4392        sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
4393        try:
4394            ret = f(*paArgs)
4395        except Exception as exc:
4396            sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
4397            raise
4398        sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
4399        return ret
4400    try:
4401        z.__name__ = f.__name__
4402    except AttributeError:
4403        pass
4404    return z
4405
4406#
4407# global helpers
4408#
4409def delimitedList( expr, delim=",", combine=False ):
4410    """
4411    Helper to define a delimited list of expressions - the delimiter defaults to ','.
4412    By default, the list elements and delimiters can have intervening whitespace, and
4413    comments, but this can be overridden by passing C{combine=True} in the constructor.
4414    If C{combine} is set to C{True}, the matching tokens are returned as a single token
4415    string, with the delimiters included; otherwise, the matching tokens are returned
4416    as a list of tokens, with the delimiters suppressed.
4417
4418    Example::
4419        delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
4420        delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
4421    """
4422    dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
4423    if combine:
4424        return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
4425    else:
4426        return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4427
4428def countedArray( expr, intExpr=None ):
4429    """
4430    Helper to define a counted list of expressions.
4431    This helper defines a pattern of the form::
4432        integer expr expr expr...
4433    where the leading integer tells how many expr expressions follow.
4434    The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
4435
4436    If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
4437
4438    Example::
4439        countedArray(Word(alphas)).parseString('2 ab cd ef')  # -> ['ab', 'cd']
4440
4441        # in this parser, the leading integer value is given in binary,
4442        # '10' indicating that 2 values are in the array
4443        binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
4444        countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef')  # -> ['ab', 'cd']
4445    """
4446    arrayExpr = Forward()
4447    def countFieldParseAction(s,l,t):
4448        n = t[0]
4449        arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
4450        return []
4451    if intExpr is None:
4452        intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
4453    else:
4454        intExpr = intExpr.copy()
4455    intExpr.setName("arrayLen")
4456    intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
4457    return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
4458
4459def _flatten(L):
4460    ret = []
4461    for i in L:
4462        if isinstance(i,list):
4463            ret.extend(_flatten(i))
4464        else:
4465            ret.append(i)
4466    return ret
4467
4468def matchPreviousLiteral(expr):
4469    """
4470    Helper to define an expression that is indirectly defined from
4471    the tokens matched in a previous expression, that is, it looks
4472    for a 'repeat' of a previous expression.  For example::
4473        first = Word(nums)
4474        second = matchPreviousLiteral(first)
4475        matchExpr = first + ":" + second
4476    will match C{"1:1"}, but not C{"1:2"}.  Because this matches a
4477    previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
4478    If this is not desired, use C{matchPreviousExpr}.
4479    Do I{not} use with packrat parsing enabled.
4480    """
4481    rep = Forward()
4482    def copyTokenToRepeater(s,l,t):
4483        if t:
4484            if len(t) == 1:
4485                rep << t[0]
4486            else:
4487                # flatten t tokens
4488                tflat = _flatten(t.asList())
4489                rep << And(Literal(tt) for tt in tflat)
4490        else:
4491            rep << Empty()
4492    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4493    rep.setName('(prev) ' + _ustr(expr))
4494    return rep
4495
4496def matchPreviousExpr(expr):
4497    """
4498    Helper to define an expression that is indirectly defined from
4499    the tokens matched in a previous expression, that is, it looks
4500    for a 'repeat' of a previous expression.  For example::
4501        first = Word(nums)
4502        second = matchPreviousExpr(first)
4503        matchExpr = first + ":" + second
4504    will match C{"1:1"}, but not C{"1:2"}.  Because this matches by
4505    expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
4506    the expressions are evaluated first, and then compared, so
4507    C{"1"} is compared with C{"10"}.
4508    Do I{not} use with packrat parsing enabled.
4509    """
4510    rep = Forward()
4511    e2 = expr.copy()
4512    rep <<= e2
4513    def copyTokenToRepeater(s,l,t):
4514        matchTokens = _flatten(t.asList())
4515        def mustMatchTheseTokens(s,l,t):
4516            theseTokens = _flatten(t.asList())
4517            if  theseTokens != matchTokens:
4518                raise ParseException("",0,"")
4519        rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
4520    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4521    rep.setName('(prev) ' + _ustr(expr))
4522    return rep
4523
4524def _escapeRegexRangeChars(s):
4525    #~  escape these chars: ^-]
4526    for c in r"\^-]":
4527        s = s.replace(c,_bslash+c)
4528    s = s.replace("\n",r"\n")
4529    s = s.replace("\t",r"\t")
4530    return _ustr(s)
4531
4532def oneOf( strs, caseless=False, useRegex=True ):
4533    """
4534    Helper to quickly define a set of alternative Literals, and makes sure to do
4535    longest-first testing when there is a conflict, regardless of the input order,
4536    but returns a C{L{MatchFirst}} for best performance.
4537
4538    Parameters:
4539     - strs - a string of space-delimited literals, or a collection of string literals
4540     - caseless - (default=C{False}) - treat all literals as caseless
4541     - useRegex - (default=C{True}) - as an optimization, will generate a Regex
4542          object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
4543          if creating a C{Regex} raises an exception)
4544
4545    Example::
4546        comp_oper = oneOf("< = > <= >= !=")
4547        var = Word(alphas)
4548        number = Word(nums)
4549        term = var | number
4550        comparison_expr = term + comp_oper + term
4551        print(comparison_expr.searchString("B = 12  AA=23 B<=AA AA>12"))
4552    prints::
4553        [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
4554    """
4555    if caseless:
4556        isequal = ( lambda a,b: a.upper() == b.upper() )
4557        masks = ( lambda a,b: b.upper().startswith(a.upper()) )
4558        parseElementClass = CaselessLiteral
4559    else:
4560        isequal = ( lambda a,b: a == b )
4561        masks = ( lambda a,b: b.startswith(a) )
4562        parseElementClass = Literal
4563
4564    symbols = []
4565    if isinstance(strs,basestring):
4566        symbols = strs.split()
4567    elif isinstance(strs, collections.Iterable):
4568        symbols = list(strs)
4569    else:
4570        warnings.warn("Invalid argument to oneOf, expected string or iterable",
4571                SyntaxWarning, stacklevel=2)
4572    if not symbols:
4573        return NoMatch()
4574
4575    i = 0
4576    while i < len(symbols)-1:
4577        cur = symbols[i]
4578        for j,other in enumerate(symbols[i+1:]):
4579            if ( isequal(other, cur) ):
4580                del symbols[i+j+1]
4581                break
4582            elif ( masks(cur, other) ):
4583                del symbols[i+j+1]
4584                symbols.insert(i,other)
4585                cur = other
4586                break
4587        else:
4588            i += 1
4589
4590    if not caseless and useRegex:
4591        #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
4592        try:
4593            if len(symbols)==len("".join(symbols)):
4594                return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
4595            else:
4596                return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
4597        except Exception:
4598            warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
4599                    SyntaxWarning, stacklevel=2)
4600
4601
4602    # last resort, just use MatchFirst
4603    return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4604
4605def dictOf( key, value ):
4606    """
4607    Helper to easily and clearly define a dictionary by specifying the respective patterns
4608    for the key and value.  Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
4609    in the proper order.  The key pattern can include delimiting markers or punctuation,
4610    as long as they are suppressed, thereby leaving the significant key text.  The value
4611    pattern can include named results, so that the C{Dict} results can include named token
4612    fields.
4613
4614    Example::
4615        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4616        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4617        print(OneOrMore(attr_expr).parseString(text).dump())
4618
4619        attr_label = label
4620        attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
4621
4622        # similar to Dict, but simpler call format
4623        result = dictOf(attr_label, attr_value).parseString(text)
4624        print(result.dump())
4625        print(result['shape'])
4626        print(result.shape)  # object attribute access works too
4627        print(result.asDict())
4628    prints::
4629        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4630        - color: light blue
4631        - posn: upper left
4632        - shape: SQUARE
4633        - texture: burlap
4634        SQUARE
4635        SQUARE
4636        {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
4637    """
4638    return Dict( ZeroOrMore( Group ( key + value ) ) )
4639
4640def originalTextFor(expr, asString=True):
4641    """
4642    Helper to return the original, untokenized text for a given expression.  Useful to
4643    restore the parsed fields of an HTML start tag into the raw tag text itself, or to
4644    revert separate tokens with intervening whitespace back to the original matching
4645    input text. By default, returns astring containing the original parsed text.
4646
4647    If the optional C{asString} argument is passed as C{False}, then the return value is a
4648    C{L{ParseResults}} containing any results names that were originally matched, and a
4649    single token containing the original matched text from the input string.  So if
4650    the expression passed to C{L{originalTextFor}} contains expressions with defined
4651    results names, you must set C{asString} to C{False} if you want to preserve those
4652    results name values.
4653
4654    Example::
4655        src = "this is test <b> bold <i>text</i> </b> normal text "
4656        for tag in ("b","i"):
4657            opener,closer = makeHTMLTags(tag)
4658            patt = originalTextFor(opener + SkipTo(closer) + closer)
4659            print(patt.searchString(src)[0])
4660    prints::
4661        ['<b> bold <i>text</i> </b>']
4662        ['<i>text</i>']
4663    """
4664    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
4665    endlocMarker = locMarker.copy()
4666    endlocMarker.callPreparse = False
4667    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
4668    if asString:
4669        extractText = lambda s,l,t: s[t._original_start:t._original_end]
4670    else:
4671        def extractText(s,l,t):
4672            t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4673    matchExpr.setParseAction(extractText)
4674    matchExpr.ignoreExprs = expr.ignoreExprs
4675    return matchExpr
4676
4677def ungroup(expr):
4678    """
4679    Helper to undo pyparsing's default grouping of And expressions, even
4680    if all but one are non-empty.
4681    """
4682    return TokenConverter(expr).setParseAction(lambda t:t[0])
4683
4684def locatedExpr(expr):
4685    """
4686    Helper to decorate a returned token with its starting and ending locations in the input string.
4687    This helper adds the following results names:
4688     - locn_start = location where matched expression begins
4689     - locn_end = location where matched expression ends
4690     - value = the actual parsed results
4691
4692    Be careful if the input text contains C{<TAB>} characters, you may want to call
4693    C{L{ParserElement.parseWithTabs}}
4694
4695    Example::
4696        wd = Word(alphas)
4697        for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
4698            print(match)
4699    prints::
4700        [[0, 'ljsdf', 5]]
4701        [[8, 'lksdjjf', 15]]
4702        [[18, 'lkkjj', 23]]
4703    """
4704    locator = Empty().setParseAction(lambda s,l,t: l)
4705    return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4706
4707
4708# convenience constants for positional expressions
4709empty       = Empty().setName("empty")
4710lineStart   = LineStart().setName("lineStart")
4711lineEnd     = LineEnd().setName("lineEnd")
4712stringStart = StringStart().setName("stringStart")
4713stringEnd   = StringEnd().setName("stringEnd")
4714
4715_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
4716_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
4717_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
4718_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
4719_charRange = Group(_singleChar + Suppress("-") + _singleChar)
4720_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4721
4722def srange(s):
4723    r"""
4724    Helper to easily define string ranges for use in Word construction.  Borrows
4725    syntax from regexp '[]' string range definitions::
4726        srange("[0-9]")   -> "0123456789"
4727        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
4728        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
4729    The input string must be enclosed in []'s, and the returned string is the expanded
4730    character set joined into a single string.
4731    The values enclosed in the []'s may be:
4732     - a single character
4733     - an escaped character with a leading backslash (such as C{\-} or C{\]})
4734     - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
4735         (C{\0x##} is also supported for backwards compatibility)
4736     - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
4737     - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
4738     - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
4739    """
4740    _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
4741    try:
4742        return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
4743    except Exception:
4744        return ""
4745
4746def matchOnlyAtCol(n):
4747    """
4748    Helper method for defining parse actions that require matching at a specific
4749    column in the input text.
4750    """
4751    def verifyCol(strg,locn,toks):
4752        if col(locn,strg) != n:
4753            raise ParseException(strg,locn,"matched token not at column %d" % n)
4754    return verifyCol
4755
4756def replaceWith(replStr):
4757    """
4758    Helper method for common parse actions that simply return a literal value.  Especially
4759    useful when used with C{L{transformString<ParserElement.transformString>}()}.
4760
4761    Example::
4762        num = Word(nums).setParseAction(lambda toks: int(toks[0]))
4763        na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
4764        term = na | num
4765
4766        OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
4767    """
4768    return lambda s,l,t: [replStr]
4769
4770def removeQuotes(s,l,t):
4771    """
4772    Helper parse action for removing quotation marks from parsed quoted strings.
4773
4774    Example::
4775        # by default, quotation marks are included in parsed results
4776        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
4777
4778        # use removeQuotes to strip quotation marks from parsed results
4779        quotedString.setParseAction(removeQuotes)
4780        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
4781    """
4782    return t[0][1:-1]
4783
4784def tokenMap(func, *args):
4785    """
4786    Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
4787    args are passed, they are forwarded to the given function as additional arguments after
4788    the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
4789    parsed data to an integer using base 16.
4790
4791    Example (compare the last to example in L{ParserElement.transformString}::
4792        hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
4793        hex_ints.runTests('''
4794            00 11 22 aa FF 0a 0d 1a
4795            ''')
4796
4797        upperword = Word(alphas).setParseAction(tokenMap(str.upper))
4798        OneOrMore(upperword).runTests('''
4799            my kingdom for a horse
4800            ''')
4801
4802        wd = Word(alphas).setParseAction(tokenMap(str.title))
4803        OneOrMore(wd).setParseAction(' '.join).runTests('''
4804            now is the winter of our discontent made glorious summer by this sun of york
4805            ''')
4806    prints::
4807        00 11 22 aa FF 0a 0d 1a
4808        [0, 17, 34, 170, 255, 10, 13, 26]
4809
4810        my kingdom for a horse
4811        ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
4812
4813        now is the winter of our discontent made glorious summer by this sun of york
4814        ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
4815    """
4816    def pa(s,l,t):
4817        return [func(tokn, *args) for tokn in t]
4818
4819    try:
4820        func_name = getattr(func, '__name__',
4821                            getattr(func, '__class__').__name__)
4822    except Exception:
4823        func_name = str(func)
4824    pa.__name__ = func_name
4825
4826    return pa
4827
4828upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
4829"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
4830
4831downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
4832"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
4833
4834def _makeTags(tagStr, xml):
4835    """Internal helper to construct opening and closing tag expressions, given a tag name"""
4836    if isinstance(tagStr,basestring):
4837        resname = tagStr
4838        tagStr = Keyword(tagStr, caseless=not xml)
4839    else:
4840        resname = tagStr.name
4841
4842    tagAttrName = Word(alphas,alphanums+"_-:")
4843    if (xml):
4844        tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
4845        openTag = Suppress("<") + tagStr("tag") + \
4846                Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
4847                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
4848    else:
4849        printablesLessRAbrack = "".join(c for c in printables if c not in ">")
4850        tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
4851        openTag = Suppress("<") + tagStr("tag") + \
4852                Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
4853                Optional( Suppress("=") + tagAttrValue ) ))) + \
4854                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
4855    closeTag = Combine(_L("</") + tagStr + ">")
4856
4857    openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
4858    closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
4859    openTag.tag = resname
4860    closeTag.tag = resname
4861    return openTag, closeTag
4862
4863def makeHTMLTags(tagStr):
4864    """
4865    Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
4866    tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
4867
4868    Example::
4869        text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
4870        # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
4871        a,a_end = makeHTMLTags("A")
4872        link_expr = a + SkipTo(a_end)("link_text") + a_end
4873
4874        for link in link_expr.searchString(text):
4875            # attributes in the <A> tag (like "href" shown here) are also accessible as named results
4876            print(link.link_text, '->', link.href)
4877    prints::
4878        pyparsing -> http://pyparsing.wikispaces.com
4879    """
4880    return _makeTags( tagStr, False )
4881
4882def makeXMLTags(tagStr):
4883    """
4884    Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
4885    tags only in the given upper/lower case.
4886
4887    Example: similar to L{makeHTMLTags}
4888    """
4889    return _makeTags( tagStr, True )
4890
4891def withAttribute(*args,**attrDict):
4892    """
4893    Helper to create a validating parse action to be used with start tags created
4894    with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
4895    with a required attribute value, to avoid false matches on common tags such as
4896    C{<TD>} or C{<DIV>}.
4897
4898    Call C{withAttribute} with a series of attribute names and values. Specify the list
4899    of filter attributes names and values as:
4900     - keyword arguments, as in C{(align="right")}, or
4901     - as an explicit dict with C{**} operator, when an attribute name is also a Python
4902          reserved word, as in C{**{"class":"Customer", "align":"right"}}
4903     - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
4904    For attribute names with a namespace prefix, you must use the second form.  Attribute
4905    names are matched insensitive to upper/lower case.
4906
4907    If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
4908
4909    To verify that the attribute exists, but without specifying a value, pass
4910    C{withAttribute.ANY_VALUE} as the value.
4911
4912    Example::
4913        html = '''
4914            <div>
4915            Some text
4916            <div type="grid">1 4 0 1 0</div>
4917            <div type="graph">1,3 2,3 1,1</div>
4918            <div>this has no type</div>
4919            </div>
4920
4921        '''
4922        div,div_end = makeHTMLTags("div")
4923
4924        # only match div tag having a type attribute with value "grid"
4925        div_grid = div().setParseAction(withAttribute(type="grid"))
4926        grid_expr = div_grid + SkipTo(div | div_end)("body")
4927        for grid_header in grid_expr.searchString(html):
4928            print(grid_header.body)
4929
4930        # construct a match with any div tag having a type attribute, regardless of the value
4931        div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
4932        div_expr = div_any_type + SkipTo(div | div_end)("body")
4933        for div_header in div_expr.searchString(html):
4934            print(div_header.body)
4935    prints::
4936        1 4 0 1 0
4937
4938        1 4 0 1 0
4939        1,3 2,3 1,1
4940    """
4941    if args:
4942        attrs = args[:]
4943    else:
4944        attrs = attrDict.items()
4945    attrs = [(k,v) for k,v in attrs]
4946    def pa(s,l,tokens):
4947        for attrName,attrValue in attrs:
4948            if attrName not in tokens:
4949                raise ParseException(s,l,"no matching attribute " + attrName)
4950            if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
4951                raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
4952                                            (attrName, tokens[attrName], attrValue))
4953    return pa
4954withAttribute.ANY_VALUE = object()
4955
4956def withClass(classname, namespace=''):
4957    """
4958    Simplified version of C{L{withAttribute}} when matching on a div class - made
4959    difficult because C{class} is a reserved word in Python.
4960
4961    Example::
4962        html = '''
4963            <div>
4964            Some text
4965            <div class="grid">1 4 0 1 0</div>
4966            <div class="graph">1,3 2,3 1,1</div>
4967            <div>this &lt;div&gt; has no class</div>
4968            </div>
4969
4970        '''
4971        div,div_end = makeHTMLTags("div")
4972        div_grid = div().setParseAction(withClass("grid"))
4973
4974        grid_expr = div_grid + SkipTo(div | div_end)("body")
4975        for grid_header in grid_expr.searchString(html):
4976            print(grid_header.body)
4977
4978        div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
4979        div_expr = div_any_type + SkipTo(div | div_end)("body")
4980        for div_header in div_expr.searchString(html):
4981            print(div_header.body)
4982    prints::
4983        1 4 0 1 0
4984
4985        1 4 0 1 0
4986        1,3 2,3 1,1
4987    """
4988    classattr = "%s:class" % namespace if namespace else "class"
4989    return withAttribute(**{classattr : classname})
4990
4991opAssoc = _Constants()
4992opAssoc.LEFT = object()
4993opAssoc.RIGHT = object()
4994
4995def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
4996    """
4997    Helper method for constructing grammars of expressions made up of
4998    operators working in a precedence hierarchy.  Operators may be unary or
4999    binary, left- or right-associative.  Parse actions can also be attached
5000    to operator expressions. The generated parser will also recognize the use
5001    of parentheses to override operator precedences (see example below).
5002
5003    Note: if you define a deep operator list, you may see performance issues
5004    when using infixNotation. See L{ParserElement.enablePackrat} for a
5005    mechanism to potentially improve your parser performance.
5006
5007    Parameters:
5008     - baseExpr - expression representing the most basic element for the nested
5009     - opList - list of tuples, one for each operator precedence level in the
5010      expression grammar; each tuple is of the form
5011      (opExpr, numTerms, rightLeftAssoc, parseAction), where:
5012       - opExpr is the pyparsing expression for the operator;
5013          may also be a string, which will be converted to a Literal;
5014          if numTerms is 3, opExpr is a tuple of two expressions, for the
5015          two operators separating the 3 terms
5016       - numTerms is the number of terms for this operator (must
5017          be 1, 2, or 3)
5018       - rightLeftAssoc is the indicator whether the operator is
5019          right or left associative, using the pyparsing-defined
5020          constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
5021       - parseAction is the parse action to be associated with
5022          expressions matching this operator expression (the
5023          parse action tuple member may be omitted)
5024     - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
5025     - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
5026
5027    Example::
5028        # simple example of four-function arithmetic with ints and variable names
5029        integer = pyparsing_common.signed_integer
5030        varname = pyparsing_common.identifier
5031
5032        arith_expr = infixNotation(integer | varname,
5033            [
5034            ('-', 1, opAssoc.RIGHT),
5035            (oneOf('* /'), 2, opAssoc.LEFT),
5036            (oneOf('+ -'), 2, opAssoc.LEFT),
5037            ])
5038
5039        arith_expr.runTests('''
5040            5+3*6
5041            (5+3)*6
5042            -2--11
5043            ''', fullDump=False)
5044    prints::
5045        5+3*6
5046        [[5, '+', [3, '*', 6]]]
5047
5048        (5+3)*6
5049        [[[5, '+', 3], '*', 6]]
5050
5051        -2--11
5052        [[['-', 2], '-', ['-', 11]]]
5053    """
5054    ret = Forward()
5055    lastExpr = baseExpr | ( lpar + ret + rpar )
5056    for i,operDef in enumerate(opList):
5057        opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
5058        termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
5059        if arity == 3:
5060            if opExpr is None or len(opExpr) != 2:
5061                raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
5062            opExpr1, opExpr2 = opExpr
5063        thisExpr = Forward().setName(termName)
5064        if rightLeftAssoc == opAssoc.LEFT:
5065            if arity == 1:
5066                matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
5067            elif arity == 2:
5068                if opExpr is not None:
5069                    matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
5070                else:
5071                    matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
5072            elif arity == 3:
5073                matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
5074                            Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
5075            else:
5076                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5077        elif rightLeftAssoc == opAssoc.RIGHT:
5078            if arity == 1:
5079                # try to avoid LR with this extra test
5080                if not isinstance(opExpr, Optional):
5081                    opExpr = Optional(opExpr)
5082                matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
5083            elif arity == 2:
5084                if opExpr is not None:
5085                    matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
5086                else:
5087                    matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
5088            elif arity == 3:
5089                matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
5090                            Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
5091            else:
5092                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5093        else:
5094            raise ValueError("operator must indicate right or left associativity")
5095        if pa:
5096            matchExpr.setParseAction( pa )
5097        thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
5098        lastExpr = thisExpr
5099    ret <<= lastExpr
5100    return ret
5101
5102operatorPrecedence = infixNotation
5103"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
5104
5105dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
5106sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
5107quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
5108                       Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
5109unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5110
5111def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
5112    """
5113    Helper method for defining nested lists enclosed in opening and closing
5114    delimiters ("(" and ")" are the default).
5115
5116    Parameters:
5117     - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
5118     - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
5119     - content - expression for items within the nested lists (default=C{None})
5120     - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
5121
5122    If an expression is not provided for the content argument, the nested
5123    expression will capture all whitespace-delimited content between delimiters
5124    as a list of separate values.
5125
5126    Use the C{ignoreExpr} argument to define expressions that may contain
5127    opening or closing characters that should not be treated as opening
5128    or closing characters for nesting, such as quotedString or a comment
5129    expression.  Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
5130    The default is L{quotedString}, but if no expressions are to be ignored,
5131    then pass C{None} for this argument.
5132
5133    Example::
5134        data_type = oneOf("void int short long char float double")
5135        decl_data_type = Combine(data_type + Optional(Word('*')))
5136        ident = Word(alphas+'_', alphanums+'_')
5137        number = pyparsing_common.number
5138        arg = Group(decl_data_type + ident)
5139        LPAR,RPAR = map(Suppress, "()")
5140
5141        code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
5142
5143        c_function = (decl_data_type("type")
5144                      + ident("name")
5145                      + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
5146                      + code_body("body"))
5147        c_function.ignore(cStyleComment)
5148
5149        source_code = '''
5150            int is_odd(int x) {
5151                return (x%2);
5152            }
5153
5154            int dec_to_hex(char hchar) {
5155                if (hchar >= '0' && hchar <= '9') {
5156                    return (ord(hchar)-ord('0'));
5157                } else {
5158                    return (10+ord(hchar)-ord('A'));
5159                }
5160            }
5161        '''
5162        for func in c_function.searchString(source_code):
5163            print("%(name)s (%(type)s) args: %(args)s" % func)
5164
5165    prints::
5166        is_odd (int) args: [['int', 'x']]
5167        dec_to_hex (int) args: [['char', 'hchar']]
5168    """
5169    if opener == closer:
5170        raise ValueError("opening and closing strings cannot be the same")
5171    if content is None:
5172        if isinstance(opener,basestring) and isinstance(closer,basestring):
5173            if len(opener) == 1 and len(closer)==1:
5174                if ignoreExpr is not None:
5175                    content = (Combine(OneOrMore(~ignoreExpr +
5176                                    CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5177                                ).setParseAction(lambda t:t[0].strip()))
5178                else:
5179                    content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
5180                                ).setParseAction(lambda t:t[0].strip()))
5181            else:
5182                if ignoreExpr is not None:
5183                    content = (Combine(OneOrMore(~ignoreExpr +
5184                                    ~Literal(opener) + ~Literal(closer) +
5185                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5186                                ).setParseAction(lambda t:t[0].strip()))
5187                else:
5188                    content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
5189                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5190                                ).setParseAction(lambda t:t[0].strip()))
5191        else:
5192            raise ValueError("opening and closing arguments must be strings if no content expression is given")
5193    ret = Forward()
5194    if ignoreExpr is not None:
5195        ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
5196    else:
5197        ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) )
5198    ret.setName('nested %s%s expression' % (opener,closer))
5199    return ret
5200
5201def indentedBlock(blockStatementExpr, indentStack, indent=True):
5202    """
5203    Helper method for defining space-delimited indentation blocks, such as
5204    those used to define block statements in Python source code.
5205
5206    Parameters:
5207     - blockStatementExpr - expression defining syntax of statement that
5208            is repeated within the indented block
5209     - indentStack - list created by caller to manage indentation stack
5210            (multiple statementWithIndentedBlock expressions within a single grammar
5211            should share a common indentStack)
5212     - indent - boolean indicating whether block must be indented beyond the
5213            the current level; set to False for block of left-most statements
5214            (default=C{True})
5215
5216    A valid block must contain at least one C{blockStatement}.
5217
5218    Example::
5219        data = '''
5220        def A(z):
5221          A1
5222          B = 100
5223          G = A2
5224          A2
5225          A3
5226        B
5227        def BB(a,b,c):
5228          BB1
5229          def BBA():
5230            bba1
5231            bba2
5232            bba3
5233        C
5234        D
5235        def spam(x,y):
5236             def eggs(z):
5237                 pass
5238        '''
5239
5240
5241        indentStack = [1]
5242        stmt = Forward()
5243
5244        identifier = Word(alphas, alphanums)
5245        funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
5246        func_body = indentedBlock(stmt, indentStack)
5247        funcDef = Group( funcDecl + func_body )
5248
5249        rvalue = Forward()
5250        funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
5251        rvalue << (funcCall | identifier | Word(nums))
5252        assignment = Group(identifier + "=" + rvalue)
5253        stmt << ( funcDef | assignment | identifier )
5254
5255        module_body = OneOrMore(stmt)
5256
5257        parseTree = module_body.parseString(data)
5258        parseTree.pprint()
5259    prints::
5260        [['def',
5261          'A',
5262          ['(', 'z', ')'],
5263          ':',
5264          [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
5265         'B',
5266         ['def',
5267          'BB',
5268          ['(', 'a', 'b', 'c', ')'],
5269          ':',
5270          [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
5271         'C',
5272         'D',
5273         ['def',
5274          'spam',
5275          ['(', 'x', 'y', ')'],
5276          ':',
5277          [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
5278    """
5279    def checkPeerIndent(s,l,t):
5280        if l >= len(s): return
5281        curCol = col(l,s)
5282        if curCol != indentStack[-1]:
5283            if curCol > indentStack[-1]:
5284                raise ParseFatalException(s,l,"illegal nesting")
5285            raise ParseException(s,l,"not a peer entry")
5286
5287    def checkSubIndent(s,l,t):
5288        curCol = col(l,s)
5289        if curCol > indentStack[-1]:
5290            indentStack.append( curCol )
5291        else:
5292            raise ParseException(s,l,"not a subentry")
5293
5294    def checkUnindent(s,l,t):
5295        if l >= len(s): return
5296        curCol = col(l,s)
5297        if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
5298            raise ParseException(s,l,"not an unindent")
5299        indentStack.pop()
5300
5301    NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
5302    INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
5303    PEER   = Empty().setParseAction(checkPeerIndent).setName('')
5304    UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
5305    if indent:
5306        smExpr = Group( Optional(NL) +
5307            #~ FollowedBy(blockStatementExpr) +
5308            INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
5309    else:
5310        smExpr = Group( Optional(NL) +
5311            (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
5312    blockStatementExpr.ignore(_bslash + LineEnd())
5313    return smExpr.setName('indented block')
5314
5315alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5316punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5317
5318anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
5319_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
5320commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5321def replaceHTMLEntity(t):
5322    """Helper parser action to replace common HTML entities with their special characters"""
5323    return _htmlEntityMap.get(t.entity)
5324
5325# it's easy to get these comment structures wrong - they're very common, so may as well make them available
5326cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
5327"Comment of the form C{/* ... */}"
5328
5329htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
5330"Comment of the form C{<!-- ... -->}"
5331
5332restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
5333dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
5334"Comment of the form C{// ... (to end of line)}"
5335
5336cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
5337"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
5338
5339javaStyleComment = cppStyleComment
5340"Same as C{L{cppStyleComment}}"
5341
5342pythonStyleComment = Regex(r"#.*").setName("Python style comment")
5343"Comment of the form C{# ... (to end of line)}"
5344
5345_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
5346                                  Optional( Word(" \t") +
5347                                            ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
5348commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
5349"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
5350   This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
5351
5352# some other useful expressions - using lower-case class name since we are really using this as a namespace
5353class pyparsing_common:
5354    """
5355    Here are some common low-level expressions that may be useful in jump-starting parser development:
5356     - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
5357     - common L{programming identifiers<identifier>}
5358     - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
5359     - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
5360     - L{UUID<uuid>}
5361     - L{comma-separated list<comma_separated_list>}
5362    Parse actions:
5363     - C{L{convertToInteger}}
5364     - C{L{convertToFloat}}
5365     - C{L{convertToDate}}
5366     - C{L{convertToDatetime}}
5367     - C{L{stripHTMLTags}}
5368     - C{L{upcaseTokens}}
5369     - C{L{downcaseTokens}}
5370
5371    Example::
5372        pyparsing_common.number.runTests('''
5373            # any int or real number, returned as the appropriate type
5374            100
5375            -100
5376            +100
5377            3.14159
5378            6.02e23
5379            1e-12
5380            ''')
5381
5382        pyparsing_common.fnumber.runTests('''
5383            # any int or real number, returned as float
5384            100
5385            -100
5386            +100
5387            3.14159
5388            6.02e23
5389            1e-12
5390            ''')
5391
5392        pyparsing_common.hex_integer.runTests('''
5393            # hex numbers
5394            100
5395            FF
5396            ''')
5397
5398        pyparsing_common.fraction.runTests('''
5399            # fractions
5400            1/2
5401            -3/4
5402            ''')
5403
5404        pyparsing_common.mixed_integer.runTests('''
5405            # mixed fractions
5406            1
5407            1/2
5408            -3/4
5409            1-3/4
5410            ''')
5411
5412        import uuid
5413        pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5414        pyparsing_common.uuid.runTests('''
5415            # uuid
5416            12345678-1234-5678-1234-567812345678
5417            ''')
5418    prints::
5419        # any int or real number, returned as the appropriate type
5420        100
5421        [100]
5422
5423        -100
5424        [-100]
5425
5426        +100
5427        [100]
5428
5429        3.14159
5430        [3.14159]
5431
5432        6.02e23
5433        [6.02e+23]
5434
5435        1e-12
5436        [1e-12]
5437
5438        # any int or real number, returned as float
5439        100
5440        [100.0]
5441
5442        -100
5443        [-100.0]
5444
5445        +100
5446        [100.0]
5447
5448        3.14159
5449        [3.14159]
5450
5451        6.02e23
5452        [6.02e+23]
5453
5454        1e-12
5455        [1e-12]
5456
5457        # hex numbers
5458        100
5459        [256]
5460
5461        FF
5462        [255]
5463
5464        # fractions
5465        1/2
5466        [0.5]
5467
5468        -3/4
5469        [-0.75]
5470
5471        # mixed fractions
5472        1
5473        [1]
5474
5475        1/2
5476        [0.5]
5477
5478        -3/4
5479        [-0.75]
5480
5481        1-3/4
5482        [1.75]
5483
5484        # uuid
5485        12345678-1234-5678-1234-567812345678
5486        [UUID('12345678-1234-5678-1234-567812345678')]
5487    """
5488
5489    convertToInteger = tokenMap(int)
5490    """
5491    Parse action for converting parsed integers to Python int
5492    """
5493
5494    convertToFloat = tokenMap(float)
5495    """
5496    Parse action for converting parsed numbers to Python float
5497    """
5498
5499    integer = Word(nums).setName("integer").setParseAction(convertToInteger)
5500    """expression that parses an unsigned integer, returns an int"""
5501
5502    hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
5503    """expression that parses a hexadecimal integer, returns an int"""
5504
5505    signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
5506    """expression that parses an integer with optional leading sign, returns an int"""
5507
5508    fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
5509    """fractional expression of an integer divided by an integer, returns a float"""
5510    fraction.addParseAction(lambda t: t[0]/t[-1])
5511
5512    mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
5513    """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
5514    mixed_integer.addParseAction(sum)
5515
5516    real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
5517    """expression that parses a floating point number and returns a float"""
5518
5519    sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
5520    """expression that parses a floating point number with optional scientific notation and returns a float"""
5521
5522    # streamlining this expression makes the docs nicer-looking
5523    number = (sci_real | real | signed_integer).streamline()
5524    """any numeric expression, returns the corresponding Python type"""
5525
5526    fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
5527    """any int or real number, returned as float"""
5528
5529    identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
5530    """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
5531
5532    ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
5533    "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
5534
5535    _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
5536    _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
5537    _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
5538    _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
5539    _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
5540    ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
5541    "IPv6 address (long, short, or mixed form)"
5542
5543    mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
5544    "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
5545
5546    @staticmethod
5547    def convertToDate(fmt="%Y-%m-%d"):
5548        """
5549        Helper to create a parse action for converting parsed date string to Python datetime.date
5550
5551        Params -
5552         - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
5553
5554        Example::
5555            date_expr = pyparsing_common.iso8601_date.copy()
5556            date_expr.setParseAction(pyparsing_common.convertToDate())
5557            print(date_expr.parseString("1999-12-31"))
5558        prints::
5559            [datetime.date(1999, 12, 31)]
5560        """
5561        def cvt_fn(s,l,t):
5562            try:
5563                return datetime.strptime(t[0], fmt).date()
5564            except ValueError as ve:
5565                raise ParseException(s, l, str(ve))
5566        return cvt_fn
5567
5568    @staticmethod
5569    def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5570        """
5571        Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
5572
5573        Params -
5574         - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
5575
5576        Example::
5577            dt_expr = pyparsing_common.iso8601_datetime.copy()
5578            dt_expr.setParseAction(pyparsing_common.convertToDatetime())
5579            print(dt_expr.parseString("1999-12-31T23:59:59.999"))
5580        prints::
5581            [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
5582        """
5583        def cvt_fn(s,l,t):
5584            try:
5585                return datetime.strptime(t[0], fmt)
5586            except ValueError as ve:
5587                raise ParseException(s, l, str(ve))
5588        return cvt_fn
5589
5590    iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
5591    "ISO8601 date (C{yyyy-mm-dd})"
5592
5593    iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
5594    "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
5595
5596    uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
5597    "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
5598
5599    _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
5600    @staticmethod
5601    def stripHTMLTags(s, l, tokens):
5602        """
5603        Parse action to remove HTML tags from web page HTML source
5604
5605        Example::
5606            # strip HTML links from normal text
5607            text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
5608            td,td_end = makeHTMLTags("TD")
5609            table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
5610
5611            print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
5612        """
5613        return pyparsing_common._html_stripper.transformString(tokens[0])
5614
5615    _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
5616                                        + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
5617    comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
5618    """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5619
5620    upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
5621    """Parse action to convert tokens to upper case."""
5622
5623    downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
5624    """Parse action to convert tokens to lower case."""
5625
5626
5627if __name__ == "__main__":
5628
5629    selectToken    = CaselessLiteral("select")
5630    fromToken      = CaselessLiteral("from")
5631
5632    ident          = Word(alphas, alphanums + "_$")
5633
5634    columnName     = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5635    columnNameList = Group(delimitedList(columnName)).setName("columns")
5636    columnSpec     = ('*' | columnNameList)
5637
5638    tableName      = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5639    tableNameList  = Group(delimitedList(tableName)).setName("tables")
5640
5641    simpleSQL      = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
5642
5643    # demo runTests method, including embedded comments in test string
5644    simpleSQL.runTests("""
5645        # '*' as column list and dotted table name
5646        select * from SYS.XYZZY
5647
5648        # caseless match on "SELECT", and casts back to "select"
5649        SELECT * from XYZZY, ABC
5650
5651        # list of column names, and mixed case SELECT keyword
5652        Select AA,BB,CC from Sys.dual
5653
5654        # multiple tables
5655        Select A, B, C from Sys.dual, Table2
5656
5657        # invalid SELECT keyword - should fail
5658        Xelect A, B, C from Sys.dual
5659
5660        # incomplete command - should fail
5661        Select
5662
5663        # invalid column name - should fail
5664        Select ^^^ frox Sys.dual
5665
5666        """)
5667
5668    pyparsing_common.number.runTests("""
5669        100
5670        -100
5671        +100
5672        3.14159
5673        6.02e23
5674        1e-12
5675        """)
5676
5677    # any int or real number, returned as float
5678    pyparsing_common.fnumber.runTests("""
5679        100
5680        -100
5681        +100
5682        3.14159
5683        6.02e23
5684        1e-12
5685        """)
5686
5687    pyparsing_common.hex_integer.runTests("""
5688        100
5689        FF
5690        """)
5691
5692    import uuid
5693    pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5694    pyparsing_common.uuid.runTests("""
5695        12345678-1234-5678-1234-567812345678
5696        """)
5697