• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# Secret Labs' Regular Expression Engine
3#
4# convert re-style regular expression to sre pattern
5#
6# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
7#
8# See the sre.py file for information on usage and redistribution.
9#
10
11"""Internal support module for sre"""
12
13# XXX: show string offset and offending character for all errors
14
15import sys
16
17from sre_constants import *
18
19SPECIAL_CHARS = ".\\[{()*+?^$|"
20REPEAT_CHARS = "*+?{"
21
22DIGITS = set("0123456789")
23
24OCTDIGITS = set("01234567")
25HEXDIGITS = set("0123456789abcdefABCDEF")
26
27WHITESPACE = set(" \t\n\r\v\f")
28
29ESCAPES = {
30    r"\a": (LITERAL, ord("\a")),
31    r"\b": (LITERAL, ord("\b")),
32    r"\f": (LITERAL, ord("\f")),
33    r"\n": (LITERAL, ord("\n")),
34    r"\r": (LITERAL, ord("\r")),
35    r"\t": (LITERAL, ord("\t")),
36    r"\v": (LITERAL, ord("\v")),
37    r"\\": (LITERAL, ord("\\"))
38}
39
40CATEGORIES = {
41    r"\A": (AT, AT_BEGINNING_STRING), # start of string
42    r"\b": (AT, AT_BOUNDARY),
43    r"\B": (AT, AT_NON_BOUNDARY),
44    r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
45    r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
46    r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
47    r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
48    r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
49    r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
50    r"\Z": (AT, AT_END_STRING), # end of string
51}
52
53FLAGS = {
54    # standard flags
55    "i": SRE_FLAG_IGNORECASE,
56    "L": SRE_FLAG_LOCALE,
57    "m": SRE_FLAG_MULTILINE,
58    "s": SRE_FLAG_DOTALL,
59    "x": SRE_FLAG_VERBOSE,
60    # extensions
61    "t": SRE_FLAG_TEMPLATE,
62    "u": SRE_FLAG_UNICODE,
63}
64
65class Pattern:
66    # master pattern object.  keeps track of global attributes
67    def __init__(self):
68        self.flags = 0
69        self.open = []
70        self.groups = 1
71        self.groupdict = {}
72        self.lookbehind = 0
73
74    def opengroup(self, name=None):
75        gid = self.groups
76        self.groups = gid + 1
77        if name is not None:
78            ogid = self.groupdict.get(name, None)
79            if ogid is not None:
80                raise error, ("redefinition of group name %s as group %d; "
81                              "was group %d" % (repr(name), gid,  ogid))
82            self.groupdict[name] = gid
83        self.open.append(gid)
84        return gid
85    def closegroup(self, gid):
86        self.open.remove(gid)
87    def checkgroup(self, gid):
88        return gid < self.groups and gid not in self.open
89
90class SubPattern:
91    # a subpattern, in intermediate form
92    def __init__(self, pattern, data=None):
93        self.pattern = pattern
94        if data is None:
95            data = []
96        self.data = data
97        self.width = None
98    def dump(self, level=0):
99        seqtypes = (tuple, list)
100        for op, av in self.data:
101            print level*"  " + op,
102            if op == IN:
103                # member sublanguage
104                print
105                for op, a in av:
106                    print (level+1)*"  " + op, a
107            elif op == BRANCH:
108                print
109                for i, a in enumerate(av[1]):
110                    if i:
111                        print level*"  " + "or"
112                    a.dump(level+1)
113            elif op == GROUPREF_EXISTS:
114                condgroup, item_yes, item_no = av
115                print condgroup
116                item_yes.dump(level+1)
117                if item_no:
118                    print level*"  " + "else"
119                    item_no.dump(level+1)
120            elif isinstance(av, seqtypes):
121                nl = 0
122                for a in av:
123                    if isinstance(a, SubPattern):
124                        if not nl:
125                            print
126                        a.dump(level+1)
127                        nl = 1
128                    else:
129                        print a,
130                        nl = 0
131                if not nl:
132                    print
133            else:
134                print av
135    def __repr__(self):
136        return repr(self.data)
137    def __len__(self):
138        return len(self.data)
139    def __delitem__(self, index):
140        del self.data[index]
141    def __getitem__(self, index):
142        if isinstance(index, slice):
143            return SubPattern(self.pattern, self.data[index])
144        return self.data[index]
145    def __setitem__(self, index, code):
146        self.data[index] = code
147    def insert(self, index, code):
148        self.data.insert(index, code)
149    def append(self, code):
150        self.data.append(code)
151    def getwidth(self):
152        # determine the width (min, max) for this subpattern
153        if self.width:
154            return self.width
155        lo = hi = 0
156        UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)
157        REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
158        for op, av in self.data:
159            if op is BRANCH:
160                i = MAXREPEAT - 1
161                j = 0
162                for av in av[1]:
163                    l, h = av.getwidth()
164                    i = min(i, l)
165                    j = max(j, h)
166                lo = lo + i
167                hi = hi + j
168            elif op is CALL:
169                i, j = av.getwidth()
170                lo = lo + i
171                hi = hi + j
172            elif op is SUBPATTERN:
173                i, j = av[1].getwidth()
174                lo = lo + i
175                hi = hi + j
176            elif op in REPEATCODES:
177                i, j = av[2].getwidth()
178                lo = lo + i * av[0]
179                hi = hi + j * av[1]
180            elif op in UNITCODES:
181                lo = lo + 1
182                hi = hi + 1
183            elif op == SUCCESS:
184                break
185        self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT)
186        return self.width
187
188class Tokenizer:
189    def __init__(self, string):
190        self.string = string
191        self.index = 0
192        self.__next()
193    def __next(self):
194        if self.index >= len(self.string):
195            self.next = None
196            return
197        char = self.string[self.index]
198        if char[0] == "\\":
199            try:
200                c = self.string[self.index + 1]
201            except IndexError:
202                raise error, "bogus escape (end of line)"
203            char = char + c
204        self.index = self.index + len(char)
205        self.next = char
206    def match(self, char, skip=1):
207        if char == self.next:
208            if skip:
209                self.__next()
210            return 1
211        return 0
212    def get(self):
213        this = self.next
214        self.__next()
215        return this
216    def tell(self):
217        return self.index, self.next
218    def seek(self, index):
219        self.index, self.next = index
220
221def isident(char):
222    return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
223
224def isdigit(char):
225    return "0" <= char <= "9"
226
227def isname(name):
228    # check that group name is a valid string
229    if not isident(name[0]):
230        return False
231    for char in name[1:]:
232        if not isident(char) and not isdigit(char):
233            return False
234    return True
235
236def _class_escape(source, escape):
237    # handle escape code inside character class
238    code = ESCAPES.get(escape)
239    if code:
240        return code
241    code = CATEGORIES.get(escape)
242    if code and code[0] == IN:
243        return code
244    try:
245        c = escape[1:2]
246        if c == "x":
247            # hexadecimal escape (exactly two digits)
248            while source.next in HEXDIGITS and len(escape) < 4:
249                escape = escape + source.get()
250            escape = escape[2:]
251            if len(escape) != 2:
252                raise error, "bogus escape: %s" % repr("\\" + escape)
253            return LITERAL, int(escape, 16) & 0xff
254        elif c in OCTDIGITS:
255            # octal escape (up to three digits)
256            while source.next in OCTDIGITS and len(escape) < 4:
257                escape = escape + source.get()
258            escape = escape[1:]
259            return LITERAL, int(escape, 8) & 0xff
260        elif c in DIGITS:
261            raise error, "bogus escape: %s" % repr(escape)
262        if len(escape) == 2:
263            return LITERAL, ord(escape[1])
264    except ValueError:
265        pass
266    raise error, "bogus escape: %s" % repr(escape)
267
268def _escape(source, escape, state):
269    # handle escape code in expression
270    code = CATEGORIES.get(escape)
271    if code:
272        return code
273    code = ESCAPES.get(escape)
274    if code:
275        return code
276    try:
277        c = escape[1:2]
278        if c == "x":
279            # hexadecimal escape
280            while source.next in HEXDIGITS and len(escape) < 4:
281                escape = escape + source.get()
282            if len(escape) != 4:
283                raise ValueError
284            return LITERAL, int(escape[2:], 16) & 0xff
285        elif c == "0":
286            # octal escape
287            while source.next in OCTDIGITS and len(escape) < 4:
288                escape = escape + source.get()
289            return LITERAL, int(escape[1:], 8) & 0xff
290        elif c in DIGITS:
291            # octal escape *or* decimal group reference (sigh)
292            if source.next in DIGITS:
293                escape = escape + source.get()
294                if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
295                    source.next in OCTDIGITS):
296                    # got three octal digits; this is an octal escape
297                    escape = escape + source.get()
298                    return LITERAL, int(escape[1:], 8) & 0xff
299            # not an octal escape, so this is a group reference
300            group = int(escape[1:])
301            if group < state.groups:
302                if not state.checkgroup(group):
303                    raise error, "cannot refer to open group"
304                if state.lookbehind:
305                    import warnings
306                    warnings.warn('group references in lookbehind '
307                                  'assertions are not supported',
308                                  RuntimeWarning)
309                return GROUPREF, group
310            raise ValueError
311        if len(escape) == 2:
312            return LITERAL, ord(escape[1])
313    except ValueError:
314        pass
315    raise error, "bogus escape: %s" % repr(escape)
316
317def _parse_sub(source, state, nested=1):
318    # parse an alternation: a|b|c
319
320    items = []
321    itemsappend = items.append
322    sourcematch = source.match
323    while 1:
324        itemsappend(_parse(source, state))
325        if sourcematch("|"):
326            continue
327        if not nested:
328            break
329        if not source.next or sourcematch(")", 0):
330            break
331        else:
332            raise error, "pattern not properly closed"
333
334    if len(items) == 1:
335        return items[0]
336
337    subpattern = SubPattern(state)
338    subpatternappend = subpattern.append
339
340    # check if all items share a common prefix
341    while 1:
342        prefix = None
343        for item in items:
344            if not item:
345                break
346            if prefix is None:
347                prefix = item[0]
348            elif item[0] != prefix:
349                break
350        else:
351            # all subitems start with a common "prefix".
352            # move it out of the branch
353            for item in items:
354                del item[0]
355            subpatternappend(prefix)
356            continue # check next one
357        break
358
359    # check if the branch can be replaced by a character set
360    for item in items:
361        if len(item) != 1 or item[0][0] != LITERAL:
362            break
363    else:
364        # we can store this as a character set instead of a
365        # branch (the compiler may optimize this even more)
366        set = []
367        setappend = set.append
368        for item in items:
369            setappend(item[0])
370        subpatternappend((IN, set))
371        return subpattern
372
373    subpattern.append((BRANCH, (None, items)))
374    return subpattern
375
376def _parse_sub_cond(source, state, condgroup):
377    item_yes = _parse(source, state)
378    if source.match("|"):
379        item_no = _parse(source, state)
380        if source.match("|"):
381            raise error, "conditional backref with more than two branches"
382    else:
383        item_no = None
384    if source.next and not source.match(")", 0):
385        raise error, "pattern not properly closed"
386    subpattern = SubPattern(state)
387    subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
388    return subpattern
389
390_PATTERNENDERS = set("|)")
391_ASSERTCHARS = set("=!<")
392_LOOKBEHINDASSERTCHARS = set("=!")
393_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])
394
395def _parse(source, state):
396    # parse a simple pattern
397    subpattern = SubPattern(state)
398
399    # precompute constants into local variables
400    subpatternappend = subpattern.append
401    sourceget = source.get
402    sourcematch = source.match
403    _len = len
404    PATTERNENDERS = _PATTERNENDERS
405    ASSERTCHARS = _ASSERTCHARS
406    LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS
407    REPEATCODES = _REPEATCODES
408
409    while 1:
410
411        if source.next in PATTERNENDERS:
412            break # end of subpattern
413        this = sourceget()
414        if this is None:
415            break # end of pattern
416
417        if state.flags & SRE_FLAG_VERBOSE:
418            # skip whitespace and comments
419            if this in WHITESPACE:
420                continue
421            if this == "#":
422                while 1:
423                    this = sourceget()
424                    if this in (None, "\n"):
425                        break
426                continue
427
428        if this and this[0] not in SPECIAL_CHARS:
429            subpatternappend((LITERAL, ord(this)))
430
431        elif this == "[":
432            # character set
433            set = []
434            setappend = set.append
435##          if sourcematch(":"):
436##              pass # handle character classes
437            if sourcematch("^"):
438                setappend((NEGATE, None))
439            # check remaining characters
440            start = set[:]
441            while 1:
442                this = sourceget()
443                if this == "]" and set != start:
444                    break
445                elif this and this[0] == "\\":
446                    code1 = _class_escape(source, this)
447                elif this:
448                    code1 = LITERAL, ord(this)
449                else:
450                    raise error, "unexpected end of regular expression"
451                if sourcematch("-"):
452                    # potential range
453                    this = sourceget()
454                    if this == "]":
455                        if code1[0] is IN:
456                            code1 = code1[1][0]
457                        setappend(code1)
458                        setappend((LITERAL, ord("-")))
459                        break
460                    elif this:
461                        if this[0] == "\\":
462                            code2 = _class_escape(source, this)
463                        else:
464                            code2 = LITERAL, ord(this)
465                        if code1[0] != LITERAL or code2[0] != LITERAL:
466                            raise error, "bad character range"
467                        lo = code1[1]
468                        hi = code2[1]
469                        if hi < lo:
470                            raise error, "bad character range"
471                        setappend((RANGE, (lo, hi)))
472                    else:
473                        raise error, "unexpected end of regular expression"
474                else:
475                    if code1[0] is IN:
476                        code1 = code1[1][0]
477                    setappend(code1)
478
479            # XXX: <fl> should move set optimization to compiler!
480            if _len(set)==1 and set[0][0] is LITERAL:
481                subpatternappend(set[0]) # optimization
482            elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
483                subpatternappend((NOT_LITERAL, set[1][1])) # optimization
484            else:
485                # XXX: <fl> should add charmap optimization here
486                subpatternappend((IN, set))
487
488        elif this and this[0] in REPEAT_CHARS:
489            # repeat previous item
490            if this == "?":
491                min, max = 0, 1
492            elif this == "*":
493                min, max = 0, MAXREPEAT
494
495            elif this == "+":
496                min, max = 1, MAXREPEAT
497            elif this == "{":
498                if source.next == "}":
499                    subpatternappend((LITERAL, ord(this)))
500                    continue
501                here = source.tell()
502                min, max = 0, MAXREPEAT
503                lo = hi = ""
504                while source.next in DIGITS:
505                    lo = lo + source.get()
506                if sourcematch(","):
507                    while source.next in DIGITS:
508                        hi = hi + sourceget()
509                else:
510                    hi = lo
511                if not sourcematch("}"):
512                    subpatternappend((LITERAL, ord(this)))
513                    source.seek(here)
514                    continue
515                if lo:
516                    min = int(lo)
517                    if min >= MAXREPEAT:
518                        raise OverflowError("the repetition number is too large")
519                if hi:
520                    max = int(hi)
521                    if max >= MAXREPEAT:
522                        raise OverflowError("the repetition number is too large")
523                    if max < min:
524                        raise error("bad repeat interval")
525            else:
526                raise error, "not supported"
527            # figure out which item to repeat
528            if subpattern:
529                item = subpattern[-1:]
530            else:
531                item = None
532            if not item or (_len(item) == 1 and item[0][0] == AT):
533                raise error, "nothing to repeat"
534            if item[0][0] in REPEATCODES:
535                raise error, "multiple repeat"
536            if sourcematch("?"):
537                subpattern[-1] = (MIN_REPEAT, (min, max, item))
538            else:
539                subpattern[-1] = (MAX_REPEAT, (min, max, item))
540
541        elif this == ".":
542            subpatternappend((ANY, None))
543
544        elif this == "(":
545            group = 1
546            name = None
547            condgroup = None
548            if sourcematch("?"):
549                group = 0
550                # options
551                if sourcematch("P"):
552                    # python extensions
553                    if sourcematch("<"):
554                        # named group: skip forward to end of name
555                        name = ""
556                        while 1:
557                            char = sourceget()
558                            if char is None:
559                                raise error, "unterminated name"
560                            if char == ">":
561                                break
562                            name = name + char
563                        group = 1
564                        if not name:
565                            raise error("missing group name")
566                        if not isname(name):
567                            raise error("bad character in group name %r" %
568                                        name)
569                    elif sourcematch("="):
570                        # named backreference
571                        name = ""
572                        while 1:
573                            char = sourceget()
574                            if char is None:
575                                raise error, "unterminated name"
576                            if char == ")":
577                                break
578                            name = name + char
579                        if not name:
580                            raise error("missing group name")
581                        if not isname(name):
582                            raise error("bad character in backref group name "
583                                        "%r" % name)
584                        gid = state.groupdict.get(name)
585                        if gid is None:
586                            msg = "unknown group name: {0!r}".format(name)
587                            raise error(msg)
588                        if state.lookbehind:
589                            import warnings
590                            warnings.warn('group references in lookbehind '
591                                          'assertions are not supported',
592                                          RuntimeWarning)
593                        subpatternappend((GROUPREF, gid))
594                        continue
595                    else:
596                        char = sourceget()
597                        if char is None:
598                            raise error, "unexpected end of pattern"
599                        raise error, "unknown specifier: ?P%s" % char
600                elif sourcematch(":"):
601                    # non-capturing group
602                    group = 2
603                elif sourcematch("#"):
604                    # comment
605                    while 1:
606                        if source.next is None or source.next == ")":
607                            break
608                        sourceget()
609                    if not sourcematch(")"):
610                        raise error, "unbalanced parenthesis"
611                    continue
612                elif source.next in ASSERTCHARS:
613                    # lookahead assertions
614                    char = sourceget()
615                    dir = 1
616                    if char == "<":
617                        if source.next not in LOOKBEHINDASSERTCHARS:
618                            raise error, "syntax error"
619                        dir = -1 # lookbehind
620                        char = sourceget()
621                        state.lookbehind += 1
622                    p = _parse_sub(source, state)
623                    if dir < 0:
624                        state.lookbehind -= 1
625                    if not sourcematch(")"):
626                        raise error, "unbalanced parenthesis"
627                    if char == "=":
628                        subpatternappend((ASSERT, (dir, p)))
629                    else:
630                        subpatternappend((ASSERT_NOT, (dir, p)))
631                    continue
632                elif sourcematch("("):
633                    # conditional backreference group
634                    condname = ""
635                    while 1:
636                        char = sourceget()
637                        if char is None:
638                            raise error, "unterminated name"
639                        if char == ")":
640                            break
641                        condname = condname + char
642                    group = 2
643                    if not condname:
644                        raise error("missing group name")
645                    if isname(condname):
646                        condgroup = state.groupdict.get(condname)
647                        if condgroup is None:
648                            msg = "unknown group name: {0!r}".format(condname)
649                            raise error(msg)
650                    else:
651                        try:
652                            condgroup = int(condname)
653                        except ValueError:
654                            raise error, "bad character in group name"
655                    if state.lookbehind:
656                        import warnings
657                        warnings.warn('group references in lookbehind '
658                                      'assertions are not supported',
659                                      RuntimeWarning)
660                else:
661                    # flags
662                    if not source.next in FLAGS:
663                        raise error, "unexpected end of pattern"
664                    while source.next in FLAGS:
665                        state.flags = state.flags | FLAGS[sourceget()]
666            if group:
667                # parse group contents
668                if group == 2:
669                    # anonymous group
670                    group = None
671                else:
672                    group = state.opengroup(name)
673                if condgroup:
674                    p = _parse_sub_cond(source, state, condgroup)
675                else:
676                    p = _parse_sub(source, state)
677                if not sourcematch(")"):
678                    raise error, "unbalanced parenthesis"
679                if group is not None:
680                    state.closegroup(group)
681                subpatternappend((SUBPATTERN, (group, p)))
682            else:
683                while 1:
684                    char = sourceget()
685                    if char is None:
686                        raise error, "unexpected end of pattern"
687                    if char == ")":
688                        break
689                    raise error, "unknown extension"
690
691        elif this == "^":
692            subpatternappend((AT, AT_BEGINNING))
693
694        elif this == "$":
695            subpattern.append((AT, AT_END))
696
697        elif this and this[0] == "\\":
698            code = _escape(source, this, state)
699            subpatternappend(code)
700
701        else:
702            raise error, "parser error"
703
704    return subpattern
705
706def parse(str, flags=0, pattern=None):
707    # parse 're' pattern into list of (opcode, argument) tuples
708
709    source = Tokenizer(str)
710
711    if pattern is None:
712        pattern = Pattern()
713    pattern.flags = flags
714    pattern.str = str
715
716    p = _parse_sub(source, pattern, 0)
717
718    tail = source.get()
719    if tail == ")":
720        raise error, "unbalanced parenthesis"
721    elif tail:
722        raise error, "bogus characters at end of regular expression"
723
724    if flags & SRE_FLAG_DEBUG:
725        p.dump()
726
727    if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
728        # the VERBOSE flag was switched on inside the pattern.  to be
729        # on the safe side, we'll parse the whole thing again...
730        return parse(str, p.pattern.flags)
731
732    return p
733
734def parse_template(source, pattern):
735    # parse 're' replacement string into list of literals and
736    # group references
737    s = Tokenizer(source)
738    sget = s.get
739    p = []
740    a = p.append
741    def literal(literal, p=p, pappend=a):
742        if p and p[-1][0] is LITERAL:
743            p[-1] = LITERAL, p[-1][1] + literal
744        else:
745            pappend((LITERAL, literal))
746    sep = source[:0]
747    if type(sep) is type(""):
748        makechar = chr
749    else:
750        makechar = unichr
751    while 1:
752        this = sget()
753        if this is None:
754            break # end of replacement string
755        if this and this[0] == "\\":
756            # group
757            c = this[1:2]
758            if c == "g":
759                name = ""
760                if s.match("<"):
761                    while 1:
762                        char = sget()
763                        if char is None:
764                            raise error, "unterminated group name"
765                        if char == ">":
766                            break
767                        name = name + char
768                if not name:
769                    raise error, "missing group name"
770                try:
771                    index = int(name)
772                    if index < 0:
773                        raise error, "negative group number"
774                except ValueError:
775                    if not isname(name):
776                        raise error, "bad character in group name"
777                    try:
778                        index = pattern.groupindex[name]
779                    except KeyError:
780                        msg = "unknown group name: {0!r}".format(name)
781                        raise IndexError(msg)
782                a((MARK, index))
783            elif c == "0":
784                if s.next in OCTDIGITS:
785                    this = this + sget()
786                    if s.next in OCTDIGITS:
787                        this = this + sget()
788                literal(makechar(int(this[1:], 8) & 0xff))
789            elif c in DIGITS:
790                isoctal = False
791                if s.next in DIGITS:
792                    this = this + sget()
793                    if (c in OCTDIGITS and this[2] in OCTDIGITS and
794                        s.next in OCTDIGITS):
795                        this = this + sget()
796                        isoctal = True
797                        literal(makechar(int(this[1:], 8) & 0xff))
798                if not isoctal:
799                    a((MARK, int(this[1:])))
800            else:
801                try:
802                    this = makechar(ESCAPES[this][1])
803                except KeyError:
804                    pass
805                literal(this)
806        else:
807            literal(this)
808    # convert template to groups and literals lists
809    i = 0
810    groups = []
811    groupsappend = groups.append
812    literals = [None] * len(p)
813    for c, s in p:
814        if c is MARK:
815            groupsappend((i, s))
816            # literal[i] is already None
817        else:
818            literals[i] = s
819        i = i + 1
820    return groups, literals
821
822def expand_template(template, match):
823    g = match.group
824    sep = match.string[:0]
825    groups, literals = template
826    literals = literals[:]
827    try:
828        for index, group in groups:
829            literals[index] = s = g(group)
830            if s is None:
831                raise error, "unmatched group"
832    except IndexError:
833        raise error, "invalid group reference"
834    return sep.join(literals)
835