• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""This will be the home for the policy that hooks in the new
2code that adds all the email6 features.
3"""
4
5import re
6from email._policybase import Policy, Compat32, compat32, _extend_docstrings
7from email.utils import _has_surrogates
8from email.headerregistry import HeaderRegistry as HeaderRegistry
9from email.contentmanager import raw_data_manager
10from email.message import EmailMessage
11
12__all__ = [
13    'Compat32',
14    'compat32',
15    'Policy',
16    'EmailPolicy',
17    'default',
18    'strict',
19    'SMTP',
20    'HTTP',
21    ]
22
23linesep_splitter = re.compile(r'\n|\r')
24
25@_extend_docstrings
26class EmailPolicy(Policy):
27
28    """+
29    PROVISIONAL
30
31    The API extensions enabled by this policy are currently provisional.
32    Refer to the documentation for details.
33
34    This policy adds new header parsing and folding algorithms.  Instead of
35    simple strings, headers are custom objects with custom attributes
36    depending on the type of the field.  The folding algorithm fully
37    implements RFCs 2047 and 5322.
38
39    In addition to the settable attributes listed above that apply to
40    all Policies, this policy adds the following additional attributes:
41
42    utf8                -- if False (the default) message headers will be
43                           serialized as ASCII, using encoded words to encode
44                           any non-ASCII characters in the source strings.  If
45                           True, the message headers will be serialized using
46                           utf8 and will not contain encoded words (see RFC
47                           6532 for more on this serialization format).
48
49    refold_source       -- if the value for a header in the Message object
50                           came from the parsing of some source, this attribute
51                           indicates whether or not a generator should refold
52                           that value when transforming the message back into
53                           stream form.  The possible values are:
54
55                           none  -- all source values use original folding
56                           long  -- source values that have any line that is
57                                    longer than max_line_length will be
58                                    refolded
59                           all  -- all values are refolded.
60
61                           The default is 'long'.
62
63    header_factory      -- a callable that takes two arguments, 'name' and
64                           'value', where 'name' is a header field name and
65                           'value' is an unfolded header field value, and
66                           returns a string-like object that represents that
67                           header.  A default header_factory is provided that
68                           understands some of the RFC5322 header field types.
69                           (Currently address fields and date fields have
70                           special treatment, while all other fields are
71                           treated as unstructured.  This list will be
72                           completed before the extension is marked stable.)
73
74    content_manager     -- an object with at least two methods: get_content
75                           and set_content.  When the get_content or
76                           set_content method of a Message object is called,
77                           it calls the corresponding method of this object,
78                           passing it the message object as its first argument,
79                           and any arguments or keywords that were passed to
80                           it as additional arguments.  The default
81                           content_manager is
82                           :data:`~email.contentmanager.raw_data_manager`.
83
84    """
85
86    message_factory = EmailMessage
87    utf8 = False
88    refold_source = 'long'
89    header_factory = HeaderRegistry()
90    content_manager = raw_data_manager
91
92    def __init__(self, **kw):
93        # Ensure that each new instance gets a unique header factory
94        # (as opposed to clones, which share the factory).
95        if 'header_factory' not in kw:
96            object.__setattr__(self, 'header_factory', HeaderRegistry())
97        super().__init__(**kw)
98
99    def header_max_count(self, name):
100        """+
101        The implementation for this class returns the max_count attribute from
102        the specialized header class that would be used to construct a header
103        of type 'name'.
104        """
105        return self.header_factory[name].max_count
106
107    # The logic of the next three methods is chosen such that it is possible to
108    # switch a Message object between a Compat32 policy and a policy derived
109    # from this class and have the results stay consistent.  This allows a
110    # Message object constructed with this policy to be passed to a library
111    # that only handles Compat32 objects, or to receive such an object and
112    # convert it to use the newer style by just changing its policy.  It is
113    # also chosen because it postpones the relatively expensive full rfc5322
114    # parse until as late as possible when parsing from source, since in many
115    # applications only a few headers will actually be inspected.
116
117    def header_source_parse(self, sourcelines):
118        """+
119        The name is parsed as everything up to the ':' and returned unmodified.
120        The value is determined by stripping leading whitespace off the
121        remainder of the first line, joining all subsequent lines together, and
122        stripping any trailing carriage return or linefeed characters.  (This
123        is the same as Compat32).
124
125        """
126        name, value = sourcelines[0].split(':', 1)
127        value = value.lstrip(' \t') + ''.join(sourcelines[1:])
128        return (name, value.rstrip('\r\n'))
129
130    def header_store_parse(self, name, value):
131        """+
132        The name is returned unchanged.  If the input value has a 'name'
133        attribute and it matches the name ignoring case, the value is returned
134        unchanged.  Otherwise the name and value are passed to header_factory
135        method, and the resulting custom header object is returned as the
136        value.  In this case a ValueError is raised if the input value contains
137        CR or LF characters.
138
139        """
140        if hasattr(value, 'name') and value.name.lower() == name.lower():
141            return (name, value)
142        if isinstance(value, str) and len(value.splitlines())>1:
143            # XXX this error message isn't quite right when we use splitlines
144            # (see issue 22233), but I'm not sure what should happen here.
145            raise ValueError("Header values may not contain linefeed "
146                             "or carriage return characters")
147        return (name, self.header_factory(name, value))
148
149    def header_fetch_parse(self, name, value):
150        """+
151        If the value has a 'name' attribute, it is returned to unmodified.
152        Otherwise the name and the value with any linesep characters removed
153        are passed to the header_factory method, and the resulting custom
154        header object is returned.  Any surrogateescaped bytes get turned
155        into the unicode unknown-character glyph.
156
157        """
158        if hasattr(value, 'name'):
159            return value
160        # We can't use splitlines here because it splits on more than \r and \n.
161        value = ''.join(linesep_splitter.split(value))
162        return self.header_factory(name, value)
163
164    def fold(self, name, value):
165        """+
166        Header folding is controlled by the refold_source policy setting.  A
167        value is considered to be a 'source value' if and only if it does not
168        have a 'name' attribute (having a 'name' attribute means it is a header
169        object of some sort).  If a source value needs to be refolded according
170        to the policy, it is converted into a custom header object by passing
171        the name and the value with any linesep characters removed to the
172        header_factory method.  Folding of a custom header object is done by
173        calling its fold method with the current policy.
174
175        Source values are split into lines using splitlines.  If the value is
176        not to be refolded, the lines are rejoined using the linesep from the
177        policy and returned.  The exception is lines containing non-ascii
178        binary data.  In that case the value is refolded regardless of the
179        refold_source setting, which causes the binary data to be CTE encoded
180        using the unknown-8bit charset.
181
182        """
183        return self._fold(name, value, refold_binary=True)
184
185    def fold_binary(self, name, value):
186        """+
187        The same as fold if cte_type is 7bit, except that the returned value is
188        bytes.
189
190        If cte_type is 8bit, non-ASCII binary data is converted back into
191        bytes.  Headers with binary data are not refolded, regardless of the
192        refold_header setting, since there is no way to know whether the binary
193        data consists of single byte characters or multibyte characters.
194
195        If utf8 is true, headers are encoded to utf8, otherwise to ascii with
196        non-ASCII unicode rendered as encoded words.
197
198        """
199        folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
200        charset = 'utf8' if self.utf8 else 'ascii'
201        return folded.encode(charset, 'surrogateescape')
202
203    def _fold(self, name, value, refold_binary=False):
204        if hasattr(value, 'name'):
205            return value.fold(policy=self)
206        maxlen = self.max_line_length if self.max_line_length else float('inf')
207        lines = value.splitlines()
208        refold = (self.refold_source == 'all' or
209                  self.refold_source == 'long' and
210                    (lines and len(lines[0])+len(name)+2 > maxlen or
211                     any(len(x) > maxlen for x in lines[1:])))
212        if refold or refold_binary and _has_surrogates(value):
213            return self.header_factory(name, ''.join(lines)).fold(policy=self)
214        return name + ': ' + self.linesep.join(lines) + self.linesep
215
216
217default = EmailPolicy()
218# Make the default policy use the class default header_factory
219del default.header_factory
220strict = default.clone(raise_on_defect=True)
221SMTP = default.clone(linesep='\r\n')
222HTTP = default.clone(linesep='\r\n', max_line_length=None)
223SMTPUTF8 = SMTP.clone(utf8=True)
224