1"""Representing and manipulating email headers via custom objects. 2 3This module provides an implementation of the HeaderRegistry API. 4The implementation is designed to flexibly follow RFC5322 rules. 5 6Eventually HeaderRegistry will be a public API, but it isn't yet, 7and will probably change some before that happens. 8 9""" 10from types import MappingProxyType 11 12from email import utils 13from email import errors 14from email import _header_value_parser as parser 15 16class Address: 17 18 def __init__(self, display_name='', username='', domain='', addr_spec=None): 19 """Create an object representing a full email address. 20 21 An address can have a 'display_name', a 'username', and a 'domain'. In 22 addition to specifying the username and domain separately, they may be 23 specified together by using the addr_spec keyword *instead of* the 24 username and domain keywords. If an addr_spec string is specified it 25 must be properly quoted according to RFC 5322 rules; an error will be 26 raised if it is not. 27 28 An Address object has display_name, username, domain, and addr_spec 29 attributes, all of which are read-only. The addr_spec and the string 30 value of the object are both quoted according to RFC5322 rules, but 31 without any Content Transfer Encoding. 32 33 """ 34 35 inputs = ''.join(filter(None, (display_name, username, domain, addr_spec))) 36 if '\r' in inputs or '\n' in inputs: 37 raise ValueError("invalid arguments; address parts cannot contain CR or LF") 38 39 # This clause with its potential 'raise' may only happen when an 40 # application program creates an Address object using an addr_spec 41 # keyword. The email library code itself must always supply username 42 # and domain. 43 if addr_spec is not None: 44 if username or domain: 45 raise TypeError("addrspec specified when username and/or " 46 "domain also specified") 47 a_s, rest = parser.get_addr_spec(addr_spec) 48 if rest: 49 raise ValueError("Invalid addr_spec; only '{}' " 50 "could be parsed from '{}'".format( 51 a_s, addr_spec)) 52 if a_s.all_defects: 53 raise a_s.all_defects[0] 54 username = a_s.local_part 55 domain = a_s.domain 56 self._display_name = display_name 57 self._username = username 58 self._domain = domain 59 60 @property 61 def display_name(self): 62 return self._display_name 63 64 @property 65 def username(self): 66 return self._username 67 68 @property 69 def domain(self): 70 return self._domain 71 72 @property 73 def addr_spec(self): 74 """The addr_spec (username@domain) portion of the address, quoted 75 according to RFC 5322 rules, but with no Content Transfer Encoding. 76 """ 77 lp = self.username 78 if not parser.DOT_ATOM_ENDS.isdisjoint(lp): 79 lp = parser.quote_string(lp) 80 if self.domain: 81 return lp + '@' + self.domain 82 if not lp: 83 return '<>' 84 return lp 85 86 def __repr__(self): 87 return "{}(display_name={!r}, username={!r}, domain={!r})".format( 88 self.__class__.__name__, 89 self.display_name, self.username, self.domain) 90 91 def __str__(self): 92 disp = self.display_name 93 if not parser.SPECIALS.isdisjoint(disp): 94 disp = parser.quote_string(disp) 95 if disp: 96 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec 97 return "{} <{}>".format(disp, addr_spec) 98 return self.addr_spec 99 100 def __eq__(self, other): 101 if not isinstance(other, Address): 102 return NotImplemented 103 return (self.display_name == other.display_name and 104 self.username == other.username and 105 self.domain == other.domain) 106 107 108class Group: 109 110 def __init__(self, display_name=None, addresses=None): 111 """Create an object representing an address group. 112 113 An address group consists of a display_name followed by colon and a 114 list of addresses (see Address) terminated by a semi-colon. The Group 115 is created by specifying a display_name and a possibly empty list of 116 Address objects. A Group can also be used to represent a single 117 address that is not in a group, which is convenient when manipulating 118 lists that are a combination of Groups and individual Addresses. In 119 this case the display_name should be set to None. In particular, the 120 string representation of a Group whose display_name is None is the same 121 as the Address object, if there is one and only one Address object in 122 the addresses list. 123 124 """ 125 self._display_name = display_name 126 self._addresses = tuple(addresses) if addresses else tuple() 127 128 @property 129 def display_name(self): 130 return self._display_name 131 132 @property 133 def addresses(self): 134 return self._addresses 135 136 def __repr__(self): 137 return "{}(display_name={!r}, addresses={!r}".format( 138 self.__class__.__name__, 139 self.display_name, self.addresses) 140 141 def __str__(self): 142 if self.display_name is None and len(self.addresses)==1: 143 return str(self.addresses[0]) 144 disp = self.display_name 145 if disp is not None and not parser.SPECIALS.isdisjoint(disp): 146 disp = parser.quote_string(disp) 147 adrstr = ", ".join(str(x) for x in self.addresses) 148 adrstr = ' ' + adrstr if adrstr else adrstr 149 return "{}:{};".format(disp, adrstr) 150 151 def __eq__(self, other): 152 if not isinstance(other, Group): 153 return NotImplemented 154 return (self.display_name == other.display_name and 155 self.addresses == other.addresses) 156 157 158# Header Classes # 159 160class BaseHeader(str): 161 162 """Base class for message headers. 163 164 Implements generic behavior and provides tools for subclasses. 165 166 A subclass must define a classmethod named 'parse' that takes an unfolded 167 value string and a dictionary as its arguments. The dictionary will 168 contain one key, 'defects', initialized to an empty list. After the call 169 the dictionary must contain two additional keys: parse_tree, set to the 170 parse tree obtained from parsing the header, and 'decoded', set to the 171 string value of the idealized representation of the data from the value. 172 (That is, encoded words are decoded, and values that have canonical 173 representations are so represented.) 174 175 The defects key is intended to collect parsing defects, which the message 176 parser will subsequently dispose of as appropriate. The parser should not, 177 insofar as practical, raise any errors. Defects should be added to the 178 list instead. The standard header parsers register defects for RFC 179 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing 180 errors. 181 182 The parse method may add additional keys to the dictionary. In this case 183 the subclass must define an 'init' method, which will be passed the 184 dictionary as its keyword arguments. The method should use (usually by 185 setting them as the value of similarly named attributes) and remove all the 186 extra keys added by its parse method, and then use super to call its parent 187 class with the remaining arguments and keywords. 188 189 The subclass should also make sure that a 'max_count' attribute is defined 190 that is either None or 1. XXX: need to better define this API. 191 192 """ 193 194 def __new__(cls, name, value): 195 kwds = {'defects': []} 196 cls.parse(value, kwds) 197 if utils._has_surrogates(kwds['decoded']): 198 kwds['decoded'] = utils._sanitize(kwds['decoded']) 199 self = str.__new__(cls, kwds['decoded']) 200 del kwds['decoded'] 201 self.init(name, **kwds) 202 return self 203 204 def init(self, name, *, parse_tree, defects): 205 self._name = name 206 self._parse_tree = parse_tree 207 self._defects = defects 208 209 @property 210 def name(self): 211 return self._name 212 213 @property 214 def defects(self): 215 return tuple(self._defects) 216 217 def __reduce__(self): 218 return ( 219 _reconstruct_header, 220 ( 221 self.__class__.__name__, 222 self.__class__.__bases__, 223 str(self), 224 ), 225 self.__dict__) 226 227 @classmethod 228 def _reconstruct(cls, value): 229 return str.__new__(cls, value) 230 231 def fold(self, *, policy): 232 """Fold header according to policy. 233 234 The parsed representation of the header is folded according to 235 RFC5322 rules, as modified by the policy. If the parse tree 236 contains surrogateescaped bytes, the bytes are CTE encoded using 237 the charset 'unknown-8bit". 238 239 Any non-ASCII characters in the parse tree are CTE encoded using 240 charset utf-8. XXX: make this a policy setting. 241 242 The returned value is an ASCII-only string possibly containing linesep 243 characters, and ending with a linesep character. The string includes 244 the header name and the ': ' separator. 245 246 """ 247 # At some point we need to put fws here if it was in the source. 248 header = parser.Header([ 249 parser.HeaderLabel([ 250 parser.ValueTerminal(self.name, 'header-name'), 251 parser.ValueTerminal(':', 'header-sep')]), 252 ]) 253 if self._parse_tree: 254 header.append( 255 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')])) 256 header.append(self._parse_tree) 257 return header.fold(policy=policy) 258 259 260def _reconstruct_header(cls_name, bases, value): 261 return type(cls_name, bases, {})._reconstruct(value) 262 263 264class UnstructuredHeader: 265 266 max_count = None 267 value_parser = staticmethod(parser.get_unstructured) 268 269 @classmethod 270 def parse(cls, value, kwds): 271 kwds['parse_tree'] = cls.value_parser(value) 272 kwds['decoded'] = str(kwds['parse_tree']) 273 274 275class UniqueUnstructuredHeader(UnstructuredHeader): 276 277 max_count = 1 278 279 280class DateHeader: 281 282 """Header whose value consists of a single timestamp. 283 284 Provides an additional attribute, datetime, which is either an aware 285 datetime using a timezone, or a naive datetime if the timezone 286 in the input string is -0000. Also accepts a datetime as input. 287 The 'value' attribute is the normalized form of the timestamp, 288 which means it is the output of format_datetime on the datetime. 289 """ 290 291 max_count = None 292 293 # This is used only for folding, not for creating 'decoded'. 294 value_parser = staticmethod(parser.get_unstructured) 295 296 @classmethod 297 def parse(cls, value, kwds): 298 if not value: 299 kwds['defects'].append(errors.HeaderMissingRequiredValue()) 300 kwds['datetime'] = None 301 kwds['decoded'] = '' 302 kwds['parse_tree'] = parser.TokenList() 303 return 304 if isinstance(value, str): 305 value = utils.parsedate_to_datetime(value) 306 kwds['datetime'] = value 307 kwds['decoded'] = utils.format_datetime(kwds['datetime']) 308 kwds['parse_tree'] = cls.value_parser(kwds['decoded']) 309 310 def init(self, *args, **kw): 311 self._datetime = kw.pop('datetime') 312 super().init(*args, **kw) 313 314 @property 315 def datetime(self): 316 return self._datetime 317 318 319class UniqueDateHeader(DateHeader): 320 321 max_count = 1 322 323 324class AddressHeader: 325 326 max_count = None 327 328 @staticmethod 329 def value_parser(value): 330 address_list, value = parser.get_address_list(value) 331 assert not value, 'this should not happen' 332 return address_list 333 334 @classmethod 335 def parse(cls, value, kwds): 336 if isinstance(value, str): 337 # We are translating here from the RFC language (address/mailbox) 338 # to our API language (group/address). 339 kwds['parse_tree'] = address_list = cls.value_parser(value) 340 groups = [] 341 for addr in address_list.addresses: 342 groups.append(Group(addr.display_name, 343 [Address(mb.display_name or '', 344 mb.local_part or '', 345 mb.domain or '') 346 for mb in addr.all_mailboxes])) 347 defects = list(address_list.all_defects) 348 else: 349 # Assume it is Address/Group stuff 350 if not hasattr(value, '__iter__'): 351 value = [value] 352 groups = [Group(None, [item]) if not hasattr(item, 'addresses') 353 else item 354 for item in value] 355 defects = [] 356 kwds['groups'] = groups 357 kwds['defects'] = defects 358 kwds['decoded'] = ', '.join([str(item) for item in groups]) 359 if 'parse_tree' not in kwds: 360 kwds['parse_tree'] = cls.value_parser(kwds['decoded']) 361 362 def init(self, *args, **kw): 363 self._groups = tuple(kw.pop('groups')) 364 self._addresses = None 365 super().init(*args, **kw) 366 367 @property 368 def groups(self): 369 return self._groups 370 371 @property 372 def addresses(self): 373 if self._addresses is None: 374 self._addresses = tuple(address for group in self._groups 375 for address in group.addresses) 376 return self._addresses 377 378 379class UniqueAddressHeader(AddressHeader): 380 381 max_count = 1 382 383 384class SingleAddressHeader(AddressHeader): 385 386 @property 387 def address(self): 388 if len(self.addresses)!=1: 389 raise ValueError(("value of single address header {} is not " 390 "a single address").format(self.name)) 391 return self.addresses[0] 392 393 394class UniqueSingleAddressHeader(SingleAddressHeader): 395 396 max_count = 1 397 398 399class MIMEVersionHeader: 400 401 max_count = 1 402 403 value_parser = staticmethod(parser.parse_mime_version) 404 405 @classmethod 406 def parse(cls, value, kwds): 407 kwds['parse_tree'] = parse_tree = cls.value_parser(value) 408 kwds['decoded'] = str(parse_tree) 409 kwds['defects'].extend(parse_tree.all_defects) 410 kwds['major'] = None if parse_tree.minor is None else parse_tree.major 411 kwds['minor'] = parse_tree.minor 412 if parse_tree.minor is not None: 413 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor']) 414 else: 415 kwds['version'] = None 416 417 def init(self, *args, **kw): 418 self._version = kw.pop('version') 419 self._major = kw.pop('major') 420 self._minor = kw.pop('minor') 421 super().init(*args, **kw) 422 423 @property 424 def major(self): 425 return self._major 426 427 @property 428 def minor(self): 429 return self._minor 430 431 @property 432 def version(self): 433 return self._version 434 435 436class ParameterizedMIMEHeader: 437 438 # Mixin that handles the params dict. Must be subclassed and 439 # a property value_parser for the specific header provided. 440 441 max_count = 1 442 443 @classmethod 444 def parse(cls, value, kwds): 445 kwds['parse_tree'] = parse_tree = cls.value_parser(value) 446 kwds['decoded'] = str(parse_tree) 447 kwds['defects'].extend(parse_tree.all_defects) 448 if parse_tree.params is None: 449 kwds['params'] = {} 450 else: 451 # The MIME RFCs specify that parameter ordering is arbitrary. 452 kwds['params'] = {utils._sanitize(name).lower(): 453 utils._sanitize(value) 454 for name, value in parse_tree.params} 455 456 def init(self, *args, **kw): 457 self._params = kw.pop('params') 458 super().init(*args, **kw) 459 460 @property 461 def params(self): 462 return MappingProxyType(self._params) 463 464 465class ContentTypeHeader(ParameterizedMIMEHeader): 466 467 value_parser = staticmethod(parser.parse_content_type_header) 468 469 def init(self, *args, **kw): 470 super().init(*args, **kw) 471 self._maintype = utils._sanitize(self._parse_tree.maintype) 472 self._subtype = utils._sanitize(self._parse_tree.subtype) 473 474 @property 475 def maintype(self): 476 return self._maintype 477 478 @property 479 def subtype(self): 480 return self._subtype 481 482 @property 483 def content_type(self): 484 return self.maintype + '/' + self.subtype 485 486 487class ContentDispositionHeader(ParameterizedMIMEHeader): 488 489 value_parser = staticmethod(parser.parse_content_disposition_header) 490 491 def init(self, *args, **kw): 492 super().init(*args, **kw) 493 cd = self._parse_tree.content_disposition 494 self._content_disposition = cd if cd is None else utils._sanitize(cd) 495 496 @property 497 def content_disposition(self): 498 return self._content_disposition 499 500 501class ContentTransferEncodingHeader: 502 503 max_count = 1 504 505 value_parser = staticmethod(parser.parse_content_transfer_encoding_header) 506 507 @classmethod 508 def parse(cls, value, kwds): 509 kwds['parse_tree'] = parse_tree = cls.value_parser(value) 510 kwds['decoded'] = str(parse_tree) 511 kwds['defects'].extend(parse_tree.all_defects) 512 513 def init(self, *args, **kw): 514 super().init(*args, **kw) 515 self._cte = utils._sanitize(self._parse_tree.cte) 516 517 @property 518 def cte(self): 519 return self._cte 520 521 522class MessageIDHeader: 523 524 max_count = 1 525 value_parser = staticmethod(parser.parse_message_id) 526 527 @classmethod 528 def parse(cls, value, kwds): 529 kwds['parse_tree'] = parse_tree = cls.value_parser(value) 530 kwds['decoded'] = str(parse_tree) 531 kwds['defects'].extend(parse_tree.all_defects) 532 533 534# The header factory # 535 536_default_header_map = { 537 'subject': UniqueUnstructuredHeader, 538 'date': UniqueDateHeader, 539 'resent-date': DateHeader, 540 'orig-date': UniqueDateHeader, 541 'sender': UniqueSingleAddressHeader, 542 'resent-sender': SingleAddressHeader, 543 'to': UniqueAddressHeader, 544 'resent-to': AddressHeader, 545 'cc': UniqueAddressHeader, 546 'resent-cc': AddressHeader, 547 'bcc': UniqueAddressHeader, 548 'resent-bcc': AddressHeader, 549 'from': UniqueAddressHeader, 550 'resent-from': AddressHeader, 551 'reply-to': UniqueAddressHeader, 552 'mime-version': MIMEVersionHeader, 553 'content-type': ContentTypeHeader, 554 'content-disposition': ContentDispositionHeader, 555 'content-transfer-encoding': ContentTransferEncodingHeader, 556 'message-id': MessageIDHeader, 557 } 558 559class HeaderRegistry: 560 561 """A header_factory and header registry.""" 562 563 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader, 564 use_default_map=True): 565 """Create a header_factory that works with the Policy API. 566 567 base_class is the class that will be the last class in the created 568 header class's __bases__ list. default_class is the class that will be 569 used if "name" (see __call__) does not appear in the registry. 570 use_default_map controls whether or not the default mapping of names to 571 specialized classes is copied in to the registry when the factory is 572 created. The default is True. 573 574 """ 575 self.registry = {} 576 self.base_class = base_class 577 self.default_class = default_class 578 if use_default_map: 579 self.registry.update(_default_header_map) 580 581 def map_to_type(self, name, cls): 582 """Register cls as the specialized class for handling "name" headers. 583 584 """ 585 self.registry[name.lower()] = cls 586 587 def __getitem__(self, name): 588 cls = self.registry.get(name.lower(), self.default_class) 589 return type('_'+cls.__name__, (cls, self.base_class), {}) 590 591 def __call__(self, name, value): 592 """Create a header instance for header 'name' from 'value'. 593 594 Creates a header instance by creating a specialized class for parsing 595 and representing the specified header by combining the factory 596 base_class with a specialized class from the registry or the 597 default_class, and passing the name and value to the constructed 598 class's constructor. 599 600 """ 601 return self[name](name, value) 602