1"""This will be the home for the policy that hooks in the new 2code that adds all the email6 features. 3""" 4 5import re 6from email._policybase import Policy, Compat32, compat32, _extend_docstrings 7from email.utils import _has_surrogates 8from email.headerregistry import HeaderRegistry as HeaderRegistry 9from email.contentmanager import raw_data_manager 10from email.message import EmailMessage 11 12__all__ = [ 13 'Compat32', 14 'compat32', 15 'Policy', 16 'EmailPolicy', 17 'default', 18 'strict', 19 'SMTP', 20 'HTTP', 21 ] 22 23linesep_splitter = re.compile(r'\n|\r') 24 25@_extend_docstrings 26class EmailPolicy(Policy): 27 28 """+ 29 PROVISIONAL 30 31 The API extensions enabled by this policy are currently provisional. 32 Refer to the documentation for details. 33 34 This policy adds new header parsing and folding algorithms. Instead of 35 simple strings, headers are custom objects with custom attributes 36 depending on the type of the field. The folding algorithm fully 37 implements RFCs 2047 and 5322. 38 39 In addition to the settable attributes listed above that apply to 40 all Policies, this policy adds the following additional attributes: 41 42 utf8 -- if False (the default) message headers will be 43 serialized as ASCII, using encoded words to encode 44 any non-ASCII characters in the source strings. If 45 True, the message headers will be serialized using 46 utf8 and will not contain encoded words (see RFC 47 6532 for more on this serialization format). 48 49 refold_source -- if the value for a header in the Message object 50 came from the parsing of some source, this attribute 51 indicates whether or not a generator should refold 52 that value when transforming the message back into 53 stream form. The possible values are: 54 55 none -- all source values use original folding 56 long -- source values that have any line that is 57 longer than max_line_length will be 58 refolded 59 all -- all values are refolded. 60 61 The default is 'long'. 62 63 header_factory -- a callable that takes two arguments, 'name' and 64 'value', where 'name' is a header field name and 65 'value' is an unfolded header field value, and 66 returns a string-like object that represents that 67 header. A default header_factory is provided that 68 understands some of the RFC5322 header field types. 69 (Currently address fields and date fields have 70 special treatment, while all other fields are 71 treated as unstructured. This list will be 72 completed before the extension is marked stable.) 73 74 content_manager -- an object with at least two methods: get_content 75 and set_content. When the get_content or 76 set_content method of a Message object is called, 77 it calls the corresponding method of this object, 78 passing it the message object as its first argument, 79 and any arguments or keywords that were passed to 80 it as additional arguments. The default 81 content_manager is 82 :data:`~email.contentmanager.raw_data_manager`. 83 84 """ 85 86 message_factory = EmailMessage 87 utf8 = False 88 refold_source = 'long' 89 header_factory = HeaderRegistry() 90 content_manager = raw_data_manager 91 92 def __init__(self, **kw): 93 # Ensure that each new instance gets a unique header factory 94 # (as opposed to clones, which share the factory). 95 if 'header_factory' not in kw: 96 object.__setattr__(self, 'header_factory', HeaderRegistry()) 97 super().__init__(**kw) 98 99 def header_max_count(self, name): 100 """+ 101 The implementation for this class returns the max_count attribute from 102 the specialized header class that would be used to construct a header 103 of type 'name'. 104 """ 105 return self.header_factory[name].max_count 106 107 # The logic of the next three methods is chosen such that it is possible to 108 # switch a Message object between a Compat32 policy and a policy derived 109 # from this class and have the results stay consistent. This allows a 110 # Message object constructed with this policy to be passed to a library 111 # that only handles Compat32 objects, or to receive such an object and 112 # convert it to use the newer style by just changing its policy. It is 113 # also chosen because it postpones the relatively expensive full rfc5322 114 # parse until as late as possible when parsing from source, since in many 115 # applications only a few headers will actually be inspected. 116 117 def header_source_parse(self, sourcelines): 118 """+ 119 The name is parsed as everything up to the ':' and returned unmodified. 120 The value is determined by stripping leading whitespace off the 121 remainder of the first line, joining all subsequent lines together, and 122 stripping any trailing carriage return or linefeed characters. (This 123 is the same as Compat32). 124 125 """ 126 name, value = sourcelines[0].split(':', 1) 127 value = value.lstrip(' \t') + ''.join(sourcelines[1:]) 128 return (name, value.rstrip('\r\n')) 129 130 def header_store_parse(self, name, value): 131 """+ 132 The name is returned unchanged. If the input value has a 'name' 133 attribute and it matches the name ignoring case, the value is returned 134 unchanged. Otherwise the name and value are passed to header_factory 135 method, and the resulting custom header object is returned as the 136 value. In this case a ValueError is raised if the input value contains 137 CR or LF characters. 138 139 """ 140 if hasattr(value, 'name') and value.name.lower() == name.lower(): 141 return (name, value) 142 if isinstance(value, str) and len(value.splitlines())>1: 143 # XXX this error message isn't quite right when we use splitlines 144 # (see issue 22233), but I'm not sure what should happen here. 145 raise ValueError("Header values may not contain linefeed " 146 "or carriage return characters") 147 return (name, self.header_factory(name, value)) 148 149 def header_fetch_parse(self, name, value): 150 """+ 151 If the value has a 'name' attribute, it is returned to unmodified. 152 Otherwise the name and the value with any linesep characters removed 153 are passed to the header_factory method, and the resulting custom 154 header object is returned. Any surrogateescaped bytes get turned 155 into the unicode unknown-character glyph. 156 157 """ 158 if hasattr(value, 'name'): 159 return value 160 # We can't use splitlines here because it splits on more than \r and \n. 161 value = ''.join(linesep_splitter.split(value)) 162 return self.header_factory(name, value) 163 164 def fold(self, name, value): 165 """+ 166 Header folding is controlled by the refold_source policy setting. A 167 value is considered to be a 'source value' if and only if it does not 168 have a 'name' attribute (having a 'name' attribute means it is a header 169 object of some sort). If a source value needs to be refolded according 170 to the policy, it is converted into a custom header object by passing 171 the name and the value with any linesep characters removed to the 172 header_factory method. Folding of a custom header object is done by 173 calling its fold method with the current policy. 174 175 Source values are split into lines using splitlines. If the value is 176 not to be refolded, the lines are rejoined using the linesep from the 177 policy and returned. The exception is lines containing non-ascii 178 binary data. In that case the value is refolded regardless of the 179 refold_source setting, which causes the binary data to be CTE encoded 180 using the unknown-8bit charset. 181 182 """ 183 return self._fold(name, value, refold_binary=True) 184 185 def fold_binary(self, name, value): 186 """+ 187 The same as fold if cte_type is 7bit, except that the returned value is 188 bytes. 189 190 If cte_type is 8bit, non-ASCII binary data is converted back into 191 bytes. Headers with binary data are not refolded, regardless of the 192 refold_header setting, since there is no way to know whether the binary 193 data consists of single byte characters or multibyte characters. 194 195 If utf8 is true, headers are encoded to utf8, otherwise to ascii with 196 non-ASCII unicode rendered as encoded words. 197 198 """ 199 folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') 200 charset = 'utf8' if self.utf8 else 'ascii' 201 return folded.encode(charset, 'surrogateescape') 202 203 def _fold(self, name, value, refold_binary=False): 204 if hasattr(value, 'name'): 205 return value.fold(policy=self) 206 maxlen = self.max_line_length if self.max_line_length else float('inf') 207 lines = value.splitlines() 208 refold = (self.refold_source == 'all' or 209 self.refold_source == 'long' and 210 (lines and len(lines[0])+len(name)+2 > maxlen or 211 any(len(x) > maxlen for x in lines[1:]))) 212 if refold or refold_binary and _has_surrogates(value): 213 return self.header_factory(name, ''.join(lines)).fold(policy=self) 214 return name + ': ' + self.linesep.join(lines) + self.linesep 215 216 217default = EmailPolicy() 218# Make the default policy use the class default header_factory 219del default.header_factory 220strict = default.clone(raise_on_defect=True) 221SMTP = default.clone(linesep='\r\n') 222HTTP = default.clone(linesep='\r\n', max_line_length=None) 223SMTPUTF8 = SMTP.clone(utf8=True) 224