• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""A collection of string constants.
2
3Public module variables:
4
5whitespace -- a string containing all ASCII whitespace
6ascii_lowercase -- a string containing all ASCII lowercase letters
7ascii_uppercase -- a string containing all ASCII uppercase letters
8ascii_letters -- a string containing all ASCII letters
9digits -- a string containing all ASCII decimal digits
10hexdigits -- a string containing all ASCII hexadecimal digits
11octdigits -- a string containing all ASCII octal digits
12punctuation -- a string containing all ASCII punctuation characters
13printable -- a string containing all ASCII characters considered printable
14
15"""
16
17__all__ = ["ascii_letters", "ascii_lowercase", "ascii_uppercase", "capwords",
18           "digits", "hexdigits", "octdigits", "printable", "punctuation",
19           "whitespace", "Formatter", "Template"]
20
21import _string
22
23# Some strings for ctype-style character classification
24whitespace = ' \t\n\r\v\f'
25ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
26ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
27ascii_letters = ascii_lowercase + ascii_uppercase
28digits = '0123456789'
29hexdigits = digits + 'abcdef' + 'ABCDEF'
30octdigits = '01234567'
31punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
32printable = digits + ascii_letters + punctuation + whitespace
33
34# Functions which aren't available as string methods.
35
36# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
37def capwords(s, sep=None):
38    """capwords(s [,sep]) -> string
39
40    Split the argument into words using split, capitalize each
41    word using capitalize, and join the capitalized words using
42    join.  If the optional second argument sep is absent or None,
43    runs of whitespace characters are replaced by a single space
44    and leading and trailing whitespace are removed, otherwise
45    sep is used to split and join the words.
46
47    """
48    return (sep or ' ').join(x.capitalize() for x in s.split(sep))
49
50
51####################################################################
52import re as _re
53from collections import ChainMap as _ChainMap
54
55_sentinel_dict = {}
56
57class _TemplateMetaclass(type):
58    pattern = r"""
59    %(delim)s(?:
60      (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters
61      (?P<named>%(id)s)      |   # delimiter and a Python identifier
62      {(?P<braced>%(bid)s)}  |   # delimiter and a braced identifier
63      (?P<invalid>)              # Other ill-formed delimiter exprs
64    )
65    """
66
67    def __init__(cls, name, bases, dct):
68        super(_TemplateMetaclass, cls).__init__(name, bases, dct)
69        if 'pattern' in dct:
70            pattern = cls.pattern
71        else:
72            pattern = _TemplateMetaclass.pattern % {
73                'delim' : _re.escape(cls.delimiter),
74                'id'    : cls.idpattern,
75                'bid'   : cls.braceidpattern or cls.idpattern,
76                }
77        cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
78
79
80class Template(metaclass=_TemplateMetaclass):
81    """A string class for supporting $-substitutions."""
82
83    delimiter = '$'
84    # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but
85    # without the ASCII flag.  We can't add re.ASCII to flags because of
86    # backward compatibility.  So we use the ?a local flag and [a-z] pattern.
87    # See https://bugs.python.org/issue31672
88    idpattern = r'(?a:[_a-z][_a-z0-9]*)'
89    braceidpattern = None
90    flags = _re.IGNORECASE
91
92    def __init__(self, template):
93        self.template = template
94
95    # Search for $$, $identifier, ${identifier}, and any bare $'s
96
97    def _invalid(self, mo):
98        i = mo.start('invalid')
99        lines = self.template[:i].splitlines(keepends=True)
100        if not lines:
101            colno = 1
102            lineno = 1
103        else:
104            colno = i - len(''.join(lines[:-1]))
105            lineno = len(lines)
106        raise ValueError('Invalid placeholder in string: line %d, col %d' %
107                         (lineno, colno))
108
109    def substitute(self, mapping=_sentinel_dict, /, **kws):
110        if mapping is _sentinel_dict:
111            mapping = kws
112        elif kws:
113            mapping = _ChainMap(kws, mapping)
114        # Helper function for .sub()
115        def convert(mo):
116            # Check the most common path first.
117            named = mo.group('named') or mo.group('braced')
118            if named is not None:
119                return str(mapping[named])
120            if mo.group('escaped') is not None:
121                return self.delimiter
122            if mo.group('invalid') is not None:
123                self._invalid(mo)
124            raise ValueError('Unrecognized named group in pattern',
125                             self.pattern)
126        return self.pattern.sub(convert, self.template)
127
128    def safe_substitute(self, mapping=_sentinel_dict, /, **kws):
129        if mapping is _sentinel_dict:
130            mapping = kws
131        elif kws:
132            mapping = _ChainMap(kws, mapping)
133        # Helper function for .sub()
134        def convert(mo):
135            named = mo.group('named') or mo.group('braced')
136            if named is not None:
137                try:
138                    return str(mapping[named])
139                except KeyError:
140                    return mo.group()
141            if mo.group('escaped') is not None:
142                return self.delimiter
143            if mo.group('invalid') is not None:
144                return mo.group()
145            raise ValueError('Unrecognized named group in pattern',
146                             self.pattern)
147        return self.pattern.sub(convert, self.template)
148
149
150
151########################################################################
152# the Formatter class
153# see PEP 3101 for details and purpose of this class
154
155# The hard parts are reused from the C implementation.  They're exposed as "_"
156# prefixed methods of str.
157
158# The overall parser is implemented in _string.formatter_parser.
159# The field name parser is implemented in _string.formatter_field_name_split
160
161class Formatter:
162    def format(self, format_string, /, *args, **kwargs):
163        return self.vformat(format_string, args, kwargs)
164
165    def vformat(self, format_string, args, kwargs):
166        used_args = set()
167        result, _ = self._vformat(format_string, args, kwargs, used_args, 2)
168        self.check_unused_args(used_args, args, kwargs)
169        return result
170
171    def _vformat(self, format_string, args, kwargs, used_args, recursion_depth,
172                 auto_arg_index=0):
173        if recursion_depth < 0:
174            raise ValueError('Max string recursion exceeded')
175        result = []
176        for literal_text, field_name, format_spec, conversion in \
177                self.parse(format_string):
178
179            # output the literal text
180            if literal_text:
181                result.append(literal_text)
182
183            # if there's a field, output it
184            if field_name is not None:
185                # this is some markup, find the object and do
186                #  the formatting
187
188                # handle arg indexing when empty field_names are given.
189                if field_name == '':
190                    if auto_arg_index is False:
191                        raise ValueError('cannot switch from manual field '
192                                         'specification to automatic field '
193                                         'numbering')
194                    field_name = str(auto_arg_index)
195                    auto_arg_index += 1
196                elif field_name.isdigit():
197                    if auto_arg_index:
198                        raise ValueError('cannot switch from manual field '
199                                         'specification to automatic field '
200                                         'numbering')
201                    # disable auto arg incrementing, if it gets
202                    # used later on, then an exception will be raised
203                    auto_arg_index = False
204
205                # given the field_name, find the object it references
206                #  and the argument it came from
207                obj, arg_used = self.get_field(field_name, args, kwargs)
208                used_args.add(arg_used)
209
210                # do any conversion on the resulting object
211                obj = self.convert_field(obj, conversion)
212
213                # expand the format spec, if needed
214                format_spec, auto_arg_index = self._vformat(
215                    format_spec, args, kwargs,
216                    used_args, recursion_depth-1,
217                    auto_arg_index=auto_arg_index)
218
219                # format the object and append to the result
220                result.append(self.format_field(obj, format_spec))
221
222        return ''.join(result), auto_arg_index
223
224
225    def get_value(self, key, args, kwargs):
226        if isinstance(key, int):
227            return args[key]
228        else:
229            return kwargs[key]
230
231
232    def check_unused_args(self, used_args, args, kwargs):
233        pass
234
235
236    def format_field(self, value, format_spec):
237        return format(value, format_spec)
238
239
240    def convert_field(self, value, conversion):
241        # do any conversion on the resulting object
242        if conversion is None:
243            return value
244        elif conversion == 's':
245            return str(value)
246        elif conversion == 'r':
247            return repr(value)
248        elif conversion == 'a':
249            return ascii(value)
250        raise ValueError("Unknown conversion specifier {0!s}".format(conversion))
251
252
253    # returns an iterable that contains tuples of the form:
254    # (literal_text, field_name, format_spec, conversion)
255    # literal_text can be zero length
256    # field_name can be None, in which case there's no
257    #  object to format and output
258    # if field_name is not None, it is looked up, formatted
259    #  with format_spec and conversion and then used
260    def parse(self, format_string):
261        return _string.formatter_parser(format_string)
262
263
264    # given a field_name, find the object it references.
265    #  field_name:   the field being looked up, e.g. "0.name"
266    #                 or "lookup[3]"
267    #  used_args:    a set of which args have been used
268    #  args, kwargs: as passed in to vformat
269    def get_field(self, field_name, args, kwargs):
270        first, rest = _string.formatter_field_name_split(field_name)
271
272        obj = self.get_value(first, args, kwargs)
273
274        # loop through the rest of the field_name, doing
275        #  getattr or getitem as needed
276        for is_attr, i in rest:
277            if is_attr:
278                obj = getattr(obj, i)
279            else:
280                obj = obj[i]
281
282        return obj, first
283