• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""A collection of string constants.
2
3Public module variables:
4
5whitespace -- a string containing all ASCII whitespace
6ascii_lowercase -- a string containing all ASCII lowercase letters
7ascii_uppercase -- a string containing all ASCII uppercase letters
8ascii_letters -- a string containing all ASCII letters
9digits -- a string containing all ASCII decimal digits
10hexdigits -- a string containing all ASCII hexadecimal digits
11octdigits -- a string containing all ASCII octal digits
12punctuation -- a string containing all ASCII punctuation characters
13printable -- a string containing all ASCII characters considered printable
14
15"""
16
17__all__ = ["ascii_letters", "ascii_lowercase", "ascii_uppercase", "capwords",
18           "digits", "hexdigits", "octdigits", "printable", "punctuation",
19           "whitespace", "Formatter", "Template"]
20
21import _string
22
23# Some strings for ctype-style character classification
24whitespace = ' \t\n\r\v\f'
25ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
26ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
27ascii_letters = ascii_lowercase + ascii_uppercase
28digits = '0123456789'
29hexdigits = digits + 'abcdef' + 'ABCDEF'
30octdigits = '01234567'
31punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
32printable = digits + ascii_letters + punctuation + whitespace
33
34# Functions which aren't available as string methods.
35
36# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
37def capwords(s, sep=None):
38    """capwords(s [,sep]) -> string
39
40    Split the argument into words using split, capitalize each
41    word using capitalize, and join the capitalized words using
42    join.  If the optional second argument sep is absent or None,
43    runs of whitespace characters are replaced by a single space
44    and leading and trailing whitespace are removed, otherwise
45    sep is used to split and join the words.
46
47    """
48    return (sep or ' ').join(x.capitalize() for x in s.split(sep))
49
50
51####################################################################
52import re as _re
53from collections import ChainMap as _ChainMap
54
55_sentinel_dict = {}
56
57class Template:
58    """A string class for supporting $-substitutions."""
59
60    delimiter = '$'
61    # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but
62    # without the ASCII flag.  We can't add re.ASCII to flags because of
63    # backward compatibility.  So we use the ?a local flag and [a-z] pattern.
64    # See https://bugs.python.org/issue31672
65    idpattern = r'(?a:[_a-z][_a-z0-9]*)'
66    braceidpattern = None
67    flags = _re.IGNORECASE
68
69    def __init_subclass__(cls):
70        super().__init_subclass__()
71        if 'pattern' in cls.__dict__:
72            pattern = cls.pattern
73        else:
74            delim = _re.escape(cls.delimiter)
75            id = cls.idpattern
76            bid = cls.braceidpattern or cls.idpattern
77            pattern = fr"""
78            {delim}(?:
79              (?P<escaped>{delim})  |   # Escape sequence of two delimiters
80              (?P<named>{id})       |   # delimiter and a Python identifier
81              {{(?P<braced>{bid})}} |   # delimiter and a braced identifier
82              (?P<invalid>)             # Other ill-formed delimiter exprs
83            )
84            """
85        cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
86
87    def __init__(self, template):
88        self.template = template
89
90    # Search for $$, $identifier, ${identifier}, and any bare $'s
91
92    def _invalid(self, mo):
93        i = mo.start('invalid')
94        lines = self.template[:i].splitlines(keepends=True)
95        if not lines:
96            colno = 1
97            lineno = 1
98        else:
99            colno = i - len(''.join(lines[:-1]))
100            lineno = len(lines)
101        raise ValueError('Invalid placeholder in string: line %d, col %d' %
102                         (lineno, colno))
103
104    def substitute(self, mapping=_sentinel_dict, /, **kws):
105        if mapping is _sentinel_dict:
106            mapping = kws
107        elif kws:
108            mapping = _ChainMap(kws, mapping)
109        # Helper function for .sub()
110        def convert(mo):
111            # Check the most common path first.
112            named = mo.group('named') or mo.group('braced')
113            if named is not None:
114                return str(mapping[named])
115            if mo.group('escaped') is not None:
116                return self.delimiter
117            if mo.group('invalid') is not None:
118                self._invalid(mo)
119            raise ValueError('Unrecognized named group in pattern',
120                             self.pattern)
121        return self.pattern.sub(convert, self.template)
122
123    def safe_substitute(self, mapping=_sentinel_dict, /, **kws):
124        if mapping is _sentinel_dict:
125            mapping = kws
126        elif kws:
127            mapping = _ChainMap(kws, mapping)
128        # Helper function for .sub()
129        def convert(mo):
130            named = mo.group('named') or mo.group('braced')
131            if named is not None:
132                try:
133                    return str(mapping[named])
134                except KeyError:
135                    return mo.group()
136            if mo.group('escaped') is not None:
137                return self.delimiter
138            if mo.group('invalid') is not None:
139                return mo.group()
140            raise ValueError('Unrecognized named group in pattern',
141                             self.pattern)
142        return self.pattern.sub(convert, self.template)
143
144# Initialize Template.pattern.  __init_subclass__() is automatically called
145# only for subclasses, not for the Template class itself.
146Template.__init_subclass__()
147
148
149########################################################################
150# the Formatter class
151# see PEP 3101 for details and purpose of this class
152
153# The hard parts are reused from the C implementation.  They're exposed as "_"
154# prefixed methods of str.
155
156# The overall parser is implemented in _string.formatter_parser.
157# The field name parser is implemented in _string.formatter_field_name_split
158
159class Formatter:
160    def format(self, format_string, /, *args, **kwargs):
161        return self.vformat(format_string, args, kwargs)
162
163    def vformat(self, format_string, args, kwargs):
164        used_args = set()
165        result, _ = self._vformat(format_string, args, kwargs, used_args, 2)
166        self.check_unused_args(used_args, args, kwargs)
167        return result
168
169    def _vformat(self, format_string, args, kwargs, used_args, recursion_depth,
170                 auto_arg_index=0):
171        if recursion_depth < 0:
172            raise ValueError('Max string recursion exceeded')
173        result = []
174        for literal_text, field_name, format_spec, conversion in \
175                self.parse(format_string):
176
177            # output the literal text
178            if literal_text:
179                result.append(literal_text)
180
181            # if there's a field, output it
182            if field_name is not None:
183                # this is some markup, find the object and do
184                #  the formatting
185
186                # handle arg indexing when empty field_names are given.
187                if field_name == '':
188                    if auto_arg_index is False:
189                        raise ValueError('cannot switch from manual field '
190                                         'specification to automatic field '
191                                         'numbering')
192                    field_name = str(auto_arg_index)
193                    auto_arg_index += 1
194                elif field_name.isdigit():
195                    if auto_arg_index:
196                        raise ValueError('cannot switch from manual field '
197                                         'specification to automatic field '
198                                         'numbering')
199                    # disable auto arg incrementing, if it gets
200                    # used later on, then an exception will be raised
201                    auto_arg_index = False
202
203                # given the field_name, find the object it references
204                #  and the argument it came from
205                obj, arg_used = self.get_field(field_name, args, kwargs)
206                used_args.add(arg_used)
207
208                # do any conversion on the resulting object
209                obj = self.convert_field(obj, conversion)
210
211                # expand the format spec, if needed
212                format_spec, auto_arg_index = self._vformat(
213                    format_spec, args, kwargs,
214                    used_args, recursion_depth-1,
215                    auto_arg_index=auto_arg_index)
216
217                # format the object and append to the result
218                result.append(self.format_field(obj, format_spec))
219
220        return ''.join(result), auto_arg_index
221
222
223    def get_value(self, key, args, kwargs):
224        if isinstance(key, int):
225            return args[key]
226        else:
227            return kwargs[key]
228
229
230    def check_unused_args(self, used_args, args, kwargs):
231        pass
232
233
234    def format_field(self, value, format_spec):
235        return format(value, format_spec)
236
237
238    def convert_field(self, value, conversion):
239        # do any conversion on the resulting object
240        if conversion is None:
241            return value
242        elif conversion == 's':
243            return str(value)
244        elif conversion == 'r':
245            return repr(value)
246        elif conversion == 'a':
247            return ascii(value)
248        raise ValueError("Unknown conversion specifier {0!s}".format(conversion))
249
250
251    # returns an iterable that contains tuples of the form:
252    # (literal_text, field_name, format_spec, conversion)
253    # literal_text can be zero length
254    # field_name can be None, in which case there's no
255    #  object to format and output
256    # if field_name is not None, it is looked up, formatted
257    #  with format_spec and conversion and then used
258    def parse(self, format_string):
259        return _string.formatter_parser(format_string)
260
261
262    # given a field_name, find the object it references.
263    #  field_name:   the field being looked up, e.g. "0.name"
264    #                 or "lookup[3]"
265    #  used_args:    a set of which args have been used
266    #  args, kwargs: as passed in to vformat
267    def get_field(self, field_name, args, kwargs):
268        first, rest = _string.formatter_field_name_split(field_name)
269
270        obj = self.get_value(first, args, kwargs)
271
272        # loop through the rest of the field_name, doing
273        #  getattr or getitem as needed
274        for is_attr, i in rest:
275            if is_attr:
276                obj = getattr(obj, i)
277            else:
278                obj = obj[i]
279
280        return obj, first
281