1# Copyright (C) 2001-2007 Python Software Foundation 2# Author: Barry Warsaw 3# Contact: email-sig@python.org 4 5"""Basic message object for the email package object model.""" 6 7__all__ = ['Message', 'EmailMessage'] 8 9import re 10import uu 11import quopri 12from io import BytesIO, StringIO 13 14# Intrapackage imports 15from email import utils 16from email import errors 17from email._policybase import Policy, compat32 18from email import charset as _charset 19from email._encoded_words import decode_b 20Charset = _charset.Charset 21 22SEMISPACE = '; ' 23 24# Regular expression that matches `special' characters in parameters, the 25# existence of which force quoting of the parameter value. 26tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') 27 28 29def _splitparam(param): 30 # Split header parameters. BAW: this may be too simple. It isn't 31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers 32 # found in the wild. We may eventually need a full fledged parser. 33 # RDM: we might have a Header here; for now just stringify it. 34 a, sep, b = str(param).partition(';') 35 if not sep: 36 return a.strip(), None 37 return a.strip(), b.strip() 38 39def _formatparam(param, value=None, quote=True): 40 """Convenience function to format and return a key=value pair. 41 42 This will quote the value if needed or if quote is true. If value is a 43 three tuple (charset, language, value), it will be encoded according 44 to RFC2231 rules. If it contains non-ascii characters it will likewise 45 be encoded according to RFC2231 rules, using the utf-8 charset and 46 a null language. 47 """ 48 if value is not None and len(value) > 0: 49 # A tuple is used for RFC 2231 encoded parameter values where items 50 # are (charset, language, value). charset is a string, not a Charset 51 # instance. RFC 2231 encoded values are never quoted, per RFC. 52 if isinstance(value, tuple): 53 # Encode as per RFC 2231 54 param += '*' 55 value = utils.encode_rfc2231(value[2], value[0], value[1]) 56 return '%s=%s' % (param, value) 57 else: 58 try: 59 value.encode('ascii') 60 except UnicodeEncodeError: 61 param += '*' 62 value = utils.encode_rfc2231(value, 'utf-8', '') 63 return '%s=%s' % (param, value) 64 # BAW: Please check this. I think that if quote is set it should 65 # force quoting even if not necessary. 66 if quote or tspecials.search(value): 67 return '%s="%s"' % (param, utils.quote(value)) 68 else: 69 return '%s=%s' % (param, value) 70 else: 71 return param 72 73def _parseparam(s): 74 # RDM This might be a Header, so for now stringify it. 75 s = ';' + str(s) 76 plist = [] 77 while s[:1] == ';': 78 s = s[1:] 79 end = s.find(';') 80 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: 81 end = s.find(';', end + 1) 82 if end < 0: 83 end = len(s) 84 f = s[:end] 85 if '=' in f: 86 i = f.index('=') 87 f = f[:i].strip().lower() + '=' + f[i+1:].strip() 88 plist.append(f.strip()) 89 s = s[end:] 90 return plist 91 92 93def _unquotevalue(value): 94 # This is different than utils.collapse_rfc2231_value() because it doesn't 95 # try to convert the value to a unicode. Message.get_param() and 96 # Message.get_params() are both currently defined to return the tuple in 97 # the face of RFC 2231 parameters. 98 if isinstance(value, tuple): 99 return value[0], value[1], utils.unquote(value[2]) 100 else: 101 return utils.unquote(value) 102 103 104 105class Message: 106 """Basic message object. 107 108 A message object is defined as something that has a bunch of RFC 2822 109 headers and a payload. It may optionally have an envelope header 110 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a 111 multipart or a message/rfc822), then the payload is a list of Message 112 objects, otherwise it is a string. 113 114 Message objects implement part of the `mapping' interface, which assumes 115 there is exactly one occurrence of the header per message. Some headers 116 do in fact appear multiple times (e.g. Received) and for those headers, 117 you must use the explicit API to set or get all the headers. Not all of 118 the mapping methods are implemented. 119 """ 120 def __init__(self, policy=compat32): 121 self.policy = policy 122 self._headers = [] 123 self._unixfrom = None 124 self._payload = None 125 self._charset = None 126 # Defaults for multipart messages 127 self.preamble = self.epilogue = None 128 self.defects = [] 129 # Default content type 130 self._default_type = 'text/plain' 131 132 def __str__(self): 133 """Return the entire formatted message as a string. 134 """ 135 return self.as_string() 136 137 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None): 138 """Return the entire formatted message as a string. 139 140 Optional 'unixfrom', when true, means include the Unix From_ envelope 141 header. For backward compatibility reasons, if maxheaderlen is 142 not specified it defaults to 0, so you must override it explicitly 143 if you want a different maxheaderlen. 'policy' is passed to the 144 Generator instance used to serialize the message; if it is not 145 specified the policy associated with the message instance is used. 146 147 If the message object contains binary data that is not encoded 148 according to RFC standards, the non-compliant data will be replaced by 149 unicode "unknown character" code points. 150 """ 151 from email.generator import Generator 152 policy = self.policy if policy is None else policy 153 fp = StringIO() 154 g = Generator(fp, 155 mangle_from_=False, 156 maxheaderlen=maxheaderlen, 157 policy=policy) 158 g.flatten(self, unixfrom=unixfrom) 159 return fp.getvalue() 160 161 def __bytes__(self): 162 """Return the entire formatted message as a bytes object. 163 """ 164 return self.as_bytes() 165 166 def as_bytes(self, unixfrom=False, policy=None): 167 """Return the entire formatted message as a bytes object. 168 169 Optional 'unixfrom', when true, means include the Unix From_ envelope 170 header. 'policy' is passed to the BytesGenerator instance used to 171 serialize the message; if not specified the policy associated with 172 the message instance is used. 173 """ 174 from email.generator import BytesGenerator 175 policy = self.policy if policy is None else policy 176 fp = BytesIO() 177 g = BytesGenerator(fp, mangle_from_=False, policy=policy) 178 g.flatten(self, unixfrom=unixfrom) 179 return fp.getvalue() 180 181 def is_multipart(self): 182 """Return True if the message consists of multiple parts.""" 183 return isinstance(self._payload, list) 184 185 # 186 # Unix From_ line 187 # 188 def set_unixfrom(self, unixfrom): 189 self._unixfrom = unixfrom 190 191 def get_unixfrom(self): 192 return self._unixfrom 193 194 # 195 # Payload manipulation. 196 # 197 def attach(self, payload): 198 """Add the given payload to the current payload. 199 200 The current payload will always be a list of objects after this method 201 is called. If you want to set the payload to a scalar object, use 202 set_payload() instead. 203 """ 204 if self._payload is None: 205 self._payload = [payload] 206 else: 207 try: 208 self._payload.append(payload) 209 except AttributeError: 210 raise TypeError("Attach is not valid on a message with a" 211 " non-multipart payload") 212 213 def get_payload(self, i=None, decode=False): 214 """Return a reference to the payload. 215 216 The payload will either be a list object or a string. If you mutate 217 the list object, you modify the message's payload in place. Optional 218 i returns that index into the payload. 219 220 Optional decode is a flag indicating whether the payload should be 221 decoded or not, according to the Content-Transfer-Encoding header 222 (default is False). 223 224 When True and the message is not a multipart, the payload will be 225 decoded if this header's value is `quoted-printable' or `base64'. If 226 some other encoding is used, or the header is missing, or if the 227 payload has bogus data (i.e. bogus base64 or uuencoded data), the 228 payload is returned as-is. 229 230 If the message is a multipart and the decode flag is True, then None 231 is returned. 232 """ 233 # Here is the logic table for this code, based on the email5.0.0 code: 234 # i decode is_multipart result 235 # ------ ------ ------------ ------------------------------ 236 # None True True None 237 # i True True None 238 # None False True _payload (a list) 239 # i False True _payload element i (a Message) 240 # i False False error (not a list) 241 # i True False error (not a list) 242 # None False False _payload 243 # None True False _payload decoded (bytes) 244 # Note that Barry planned to factor out the 'decode' case, but that 245 # isn't so easy now that we handle the 8 bit data, which needs to be 246 # converted in both the decode and non-decode path. 247 if self.is_multipart(): 248 if decode: 249 return None 250 if i is None: 251 return self._payload 252 else: 253 return self._payload[i] 254 # For backward compatibility, Use isinstance and this error message 255 # instead of the more logical is_multipart test. 256 if i is not None and not isinstance(self._payload, list): 257 raise TypeError('Expected list, got %s' % type(self._payload)) 258 payload = self._payload 259 # cte might be a Header, so for now stringify it. 260 cte = str(self.get('content-transfer-encoding', '')).lower() 261 # payload may be bytes here. 262 if isinstance(payload, str): 263 if utils._has_surrogates(payload): 264 bpayload = payload.encode('ascii', 'surrogateescape') 265 if not decode: 266 try: 267 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') 268 except LookupError: 269 payload = bpayload.decode('ascii', 'replace') 270 elif decode: 271 try: 272 bpayload = payload.encode('ascii') 273 except UnicodeError: 274 # This won't happen for RFC compliant messages (messages 275 # containing only ASCII code points in the unicode input). 276 # If it does happen, turn the string into bytes in a way 277 # guaranteed not to fail. 278 bpayload = payload.encode('raw-unicode-escape') 279 if not decode: 280 return payload 281 if cte == 'quoted-printable': 282 return quopri.decodestring(bpayload) 283 elif cte == 'base64': 284 # XXX: this is a bit of a hack; decode_b should probably be factored 285 # out somewhere, but I haven't figured out where yet. 286 value, defects = decode_b(b''.join(bpayload.splitlines())) 287 for defect in defects: 288 self.policy.handle_defect(self, defect) 289 return value 290 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 291 in_file = BytesIO(bpayload) 292 out_file = BytesIO() 293 try: 294 uu.decode(in_file, out_file, quiet=True) 295 return out_file.getvalue() 296 except uu.Error: 297 # Some decoding problem 298 return bpayload 299 if isinstance(payload, str): 300 return bpayload 301 return payload 302 303 def set_payload(self, payload, charset=None): 304 """Set the payload to the given value. 305 306 Optional charset sets the message's default character set. See 307 set_charset() for details. 308 """ 309 if hasattr(payload, 'encode'): 310 if charset is None: 311 self._payload = payload 312 return 313 if not isinstance(charset, Charset): 314 charset = Charset(charset) 315 payload = payload.encode(charset.output_charset) 316 if hasattr(payload, 'decode'): 317 self._payload = payload.decode('ascii', 'surrogateescape') 318 else: 319 self._payload = payload 320 if charset is not None: 321 self.set_charset(charset) 322 323 def set_charset(self, charset): 324 """Set the charset of the payload to a given character set. 325 326 charset can be a Charset instance, a string naming a character set, or 327 None. If it is a string it will be converted to a Charset instance. 328 If charset is None, the charset parameter will be removed from the 329 Content-Type field. Anything else will generate a TypeError. 330 331 The message will be assumed to be of type text/* encoded with 332 charset.input_charset. It will be converted to charset.output_charset 333 and encoded properly, if needed, when generating the plain text 334 representation of the message. MIME headers (MIME-Version, 335 Content-Type, Content-Transfer-Encoding) will be added as needed. 336 """ 337 if charset is None: 338 self.del_param('charset') 339 self._charset = None 340 return 341 if not isinstance(charset, Charset): 342 charset = Charset(charset) 343 self._charset = charset 344 if 'MIME-Version' not in self: 345 self.add_header('MIME-Version', '1.0') 346 if 'Content-Type' not in self: 347 self.add_header('Content-Type', 'text/plain', 348 charset=charset.get_output_charset()) 349 else: 350 self.set_param('charset', charset.get_output_charset()) 351 if charset != charset.get_output_charset(): 352 self._payload = charset.body_encode(self._payload) 353 if 'Content-Transfer-Encoding' not in self: 354 cte = charset.get_body_encoding() 355 try: 356 cte(self) 357 except TypeError: 358 # This 'if' is for backward compatibility, it allows unicode 359 # through even though that won't work correctly if the 360 # message is serialized. 361 payload = self._payload 362 if payload: 363 try: 364 payload = payload.encode('ascii', 'surrogateescape') 365 except UnicodeError: 366 payload = payload.encode(charset.output_charset) 367 self._payload = charset.body_encode(payload) 368 self.add_header('Content-Transfer-Encoding', cte) 369 370 def get_charset(self): 371 """Return the Charset instance associated with the message's payload. 372 """ 373 return self._charset 374 375 # 376 # MAPPING INTERFACE (partial) 377 # 378 def __len__(self): 379 """Return the total number of headers, including duplicates.""" 380 return len(self._headers) 381 382 def __getitem__(self, name): 383 """Get a header value. 384 385 Return None if the header is missing instead of raising an exception. 386 387 Note that if the header appeared multiple times, exactly which 388 occurrence gets returned is undefined. Use get_all() to get all 389 the values matching a header field name. 390 """ 391 return self.get(name) 392 393 def __setitem__(self, name, val): 394 """Set the value of a header. 395 396 Note: this does not overwrite an existing header with the same field 397 name. Use __delitem__() first to delete any existing headers. 398 """ 399 max_count = self.policy.header_max_count(name) 400 if max_count: 401 lname = name.lower() 402 found = 0 403 for k, v in self._headers: 404 if k.lower() == lname: 405 found += 1 406 if found >= max_count: 407 raise ValueError("There may be at most {} {} headers " 408 "in a message".format(max_count, name)) 409 self._headers.append(self.policy.header_store_parse(name, val)) 410 411 def __delitem__(self, name): 412 """Delete all occurrences of a header, if present. 413 414 Does not raise an exception if the header is missing. 415 """ 416 name = name.lower() 417 newheaders = [] 418 for k, v in self._headers: 419 if k.lower() != name: 420 newheaders.append((k, v)) 421 self._headers = newheaders 422 423 def __contains__(self, name): 424 return name.lower() in [k.lower() for k, v in self._headers] 425 426 def __iter__(self): 427 for field, value in self._headers: 428 yield field 429 430 def keys(self): 431 """Return a list of all the message's header field names. 432 433 These will be sorted in the order they appeared in the original 434 message, or were added to the message, and may contain duplicates. 435 Any fields deleted and re-inserted are always appended to the header 436 list. 437 """ 438 return [k for k, v in self._headers] 439 440 def values(self): 441 """Return a list of all the message's header values. 442 443 These will be sorted in the order they appeared in the original 444 message, or were added to the message, and may contain duplicates. 445 Any fields deleted and re-inserted are always appended to the header 446 list. 447 """ 448 return [self.policy.header_fetch_parse(k, v) 449 for k, v in self._headers] 450 451 def items(self): 452 """Get all the message's header fields and values. 453 454 These will be sorted in the order they appeared in the original 455 message, or were added to the message, and may contain duplicates. 456 Any fields deleted and re-inserted are always appended to the header 457 list. 458 """ 459 return [(k, self.policy.header_fetch_parse(k, v)) 460 for k, v in self._headers] 461 462 def get(self, name, failobj=None): 463 """Get a header value. 464 465 Like __getitem__() but return failobj instead of None when the field 466 is missing. 467 """ 468 name = name.lower() 469 for k, v in self._headers: 470 if k.lower() == name: 471 return self.policy.header_fetch_parse(k, v) 472 return failobj 473 474 # 475 # "Internal" methods (public API, but only intended for use by a parser 476 # or generator, not normal application code. 477 # 478 479 def set_raw(self, name, value): 480 """Store name and value in the model without modification. 481 482 This is an "internal" API, intended only for use by a parser. 483 """ 484 self._headers.append((name, value)) 485 486 def raw_items(self): 487 """Return the (name, value) header pairs without modification. 488 489 This is an "internal" API, intended only for use by a generator. 490 """ 491 return iter(self._headers.copy()) 492 493 # 494 # Additional useful stuff 495 # 496 497 def get_all(self, name, failobj=None): 498 """Return a list of all the values for the named field. 499 500 These will be sorted in the order they appeared in the original 501 message, and may contain duplicates. Any fields deleted and 502 re-inserted are always appended to the header list. 503 504 If no such fields exist, failobj is returned (defaults to None). 505 """ 506 values = [] 507 name = name.lower() 508 for k, v in self._headers: 509 if k.lower() == name: 510 values.append(self.policy.header_fetch_parse(k, v)) 511 if not values: 512 return failobj 513 return values 514 515 def add_header(self, _name, _value, **_params): 516 """Extended header setting. 517 518 name is the header field to add. keyword arguments can be used to set 519 additional parameters for the header field, with underscores converted 520 to dashes. Normally the parameter will be added as key="value" unless 521 value is None, in which case only the key will be added. If a 522 parameter value contains non-ASCII characters it can be specified as a 523 three-tuple of (charset, language, value), in which case it will be 524 encoded according to RFC2231 rules. Otherwise it will be encoded using 525 the utf-8 charset and a language of ''. 526 527 Examples: 528 529 msg.add_header('content-disposition', 'attachment', filename='bud.gif') 530 msg.add_header('content-disposition', 'attachment', 531 filename=('utf-8', '', Fußballer.ppt')) 532 msg.add_header('content-disposition', 'attachment', 533 filename='Fußballer.ppt')) 534 """ 535 parts = [] 536 for k, v in _params.items(): 537 if v is None: 538 parts.append(k.replace('_', '-')) 539 else: 540 parts.append(_formatparam(k.replace('_', '-'), v)) 541 if _value is not None: 542 parts.insert(0, _value) 543 self[_name] = SEMISPACE.join(parts) 544 545 def replace_header(self, _name, _value): 546 """Replace a header. 547 548 Replace the first matching header found in the message, retaining 549 header order and case. If no matching header was found, a KeyError is 550 raised. 551 """ 552 _name = _name.lower() 553 for i, (k, v) in zip(range(len(self._headers)), self._headers): 554 if k.lower() == _name: 555 self._headers[i] = self.policy.header_store_parse(k, _value) 556 break 557 else: 558 raise KeyError(_name) 559 560 # 561 # Use these three methods instead of the three above. 562 # 563 564 def get_content_type(self): 565 """Return the message's content type. 566 567 The returned string is coerced to lower case of the form 568 `maintype/subtype'. If there was no Content-Type header in the 569 message, the default type as given by get_default_type() will be 570 returned. Since according to RFC 2045, messages always have a default 571 type this will always return a value. 572 573 RFC 2045 defines a message's default type to be text/plain unless it 574 appears inside a multipart/digest container, in which case it would be 575 message/rfc822. 576 """ 577 missing = object() 578 value = self.get('content-type', missing) 579 if value is missing: 580 # This should have no parameters 581 return self.get_default_type() 582 ctype = _splitparam(value)[0].lower() 583 # RFC 2045, section 5.2 says if its invalid, use text/plain 584 if ctype.count('/') != 1: 585 return 'text/plain' 586 return ctype 587 588 def get_content_maintype(self): 589 """Return the message's main content type. 590 591 This is the `maintype' part of the string returned by 592 get_content_type(). 593 """ 594 ctype = self.get_content_type() 595 return ctype.split('/')[0] 596 597 def get_content_subtype(self): 598 """Returns the message's sub-content type. 599 600 This is the `subtype' part of the string returned by 601 get_content_type(). 602 """ 603 ctype = self.get_content_type() 604 return ctype.split('/')[1] 605 606 def get_default_type(self): 607 """Return the `default' content type. 608 609 Most messages have a default content type of text/plain, except for 610 messages that are subparts of multipart/digest containers. Such 611 subparts have a default content type of message/rfc822. 612 """ 613 return self._default_type 614 615 def set_default_type(self, ctype): 616 """Set the `default' content type. 617 618 ctype should be either "text/plain" or "message/rfc822", although this 619 is not enforced. The default content type is not stored in the 620 Content-Type header. 621 """ 622 self._default_type = ctype 623 624 def _get_params_preserve(self, failobj, header): 625 # Like get_params() but preserves the quoting of values. BAW: 626 # should this be part of the public interface? 627 missing = object() 628 value = self.get(header, missing) 629 if value is missing: 630 return failobj 631 params = [] 632 for p in _parseparam(value): 633 try: 634 name, val = p.split('=', 1) 635 name = name.strip() 636 val = val.strip() 637 except ValueError: 638 # Must have been a bare attribute 639 name = p.strip() 640 val = '' 641 params.append((name, val)) 642 params = utils.decode_params(params) 643 return params 644 645 def get_params(self, failobj=None, header='content-type', unquote=True): 646 """Return the message's Content-Type parameters, as a list. 647 648 The elements of the returned list are 2-tuples of key/value pairs, as 649 split on the `=' sign. The left hand side of the `=' is the key, 650 while the right hand side is the value. If there is no `=' sign in 651 the parameter the value is the empty string. The value is as 652 described in the get_param() method. 653 654 Optional failobj is the object to return if there is no Content-Type 655 header. Optional header is the header to search instead of 656 Content-Type. If unquote is True, the value is unquoted. 657 """ 658 missing = object() 659 params = self._get_params_preserve(missing, header) 660 if params is missing: 661 return failobj 662 if unquote: 663 return [(k, _unquotevalue(v)) for k, v in params] 664 else: 665 return params 666 667 def get_param(self, param, failobj=None, header='content-type', 668 unquote=True): 669 """Return the parameter value if found in the Content-Type header. 670 671 Optional failobj is the object to return if there is no Content-Type 672 header, or the Content-Type header has no such parameter. Optional 673 header is the header to search instead of Content-Type. 674 675 Parameter keys are always compared case insensitively. The return 676 value can either be a string, or a 3-tuple if the parameter was RFC 677 2231 encoded. When it's a 3-tuple, the elements of the value are of 678 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and 679 LANGUAGE can be None, in which case you should consider VALUE to be 680 encoded in the us-ascii charset. You can usually ignore LANGUAGE. 681 The parameter value (either the returned string, or the VALUE item in 682 the 3-tuple) is always unquoted, unless unquote is set to False. 683 684 If your application doesn't care whether the parameter was RFC 2231 685 encoded, it can turn the return value into a string as follows: 686 687 rawparam = msg.get_param('foo') 688 param = email.utils.collapse_rfc2231_value(rawparam) 689 690 """ 691 if header not in self: 692 return failobj 693 for k, v in self._get_params_preserve(failobj, header): 694 if k.lower() == param.lower(): 695 if unquote: 696 return _unquotevalue(v) 697 else: 698 return v 699 return failobj 700 701 def set_param(self, param, value, header='Content-Type', requote=True, 702 charset=None, language='', replace=False): 703 """Set a parameter in the Content-Type header. 704 705 If the parameter already exists in the header, its value will be 706 replaced with the new value. 707 708 If header is Content-Type and has not yet been defined for this 709 message, it will be set to "text/plain" and the new parameter and 710 value will be appended as per RFC 2045. 711 712 An alternate header can be specified in the header argument, and all 713 parameters will be quoted as necessary unless requote is False. 714 715 If charset is specified, the parameter will be encoded according to RFC 716 2231. Optional language specifies the RFC 2231 language, defaulting 717 to the empty string. Both charset and language should be strings. 718 """ 719 if not isinstance(value, tuple) and charset: 720 value = (charset, language, value) 721 722 if header not in self and header.lower() == 'content-type': 723 ctype = 'text/plain' 724 else: 725 ctype = self.get(header) 726 if not self.get_param(param, header=header): 727 if not ctype: 728 ctype = _formatparam(param, value, requote) 729 else: 730 ctype = SEMISPACE.join( 731 [ctype, _formatparam(param, value, requote)]) 732 else: 733 ctype = '' 734 for old_param, old_value in self.get_params(header=header, 735 unquote=requote): 736 append_param = '' 737 if old_param.lower() == param.lower(): 738 append_param = _formatparam(param, value, requote) 739 else: 740 append_param = _formatparam(old_param, old_value, requote) 741 if not ctype: 742 ctype = append_param 743 else: 744 ctype = SEMISPACE.join([ctype, append_param]) 745 if ctype != self.get(header): 746 if replace: 747 self.replace_header(header, ctype) 748 else: 749 del self[header] 750 self[header] = ctype 751 752 def del_param(self, param, header='content-type', requote=True): 753 """Remove the given parameter completely from the Content-Type header. 754 755 The header will be re-written in place without the parameter or its 756 value. All values will be quoted as necessary unless requote is 757 False. Optional header specifies an alternative to the Content-Type 758 header. 759 """ 760 if header not in self: 761 return 762 new_ctype = '' 763 for p, v in self.get_params(header=header, unquote=requote): 764 if p.lower() != param.lower(): 765 if not new_ctype: 766 new_ctype = _formatparam(p, v, requote) 767 else: 768 new_ctype = SEMISPACE.join([new_ctype, 769 _formatparam(p, v, requote)]) 770 if new_ctype != self.get(header): 771 del self[header] 772 self[header] = new_ctype 773 774 def set_type(self, type, header='Content-Type', requote=True): 775 """Set the main type and subtype for the Content-Type header. 776 777 type must be a string in the form "maintype/subtype", otherwise a 778 ValueError is raised. 779 780 This method replaces the Content-Type header, keeping all the 781 parameters in place. If requote is False, this leaves the existing 782 header's quoting as is. Otherwise, the parameters will be quoted (the 783 default). 784 785 An alternative header can be specified in the header argument. When 786 the Content-Type header is set, we'll always also add a MIME-Version 787 header. 788 """ 789 # BAW: should we be strict? 790 if not type.count('/') == 1: 791 raise ValueError 792 # Set the Content-Type, you get a MIME-Version 793 if header.lower() == 'content-type': 794 del self['mime-version'] 795 self['MIME-Version'] = '1.0' 796 if header not in self: 797 self[header] = type 798 return 799 params = self.get_params(header=header, unquote=requote) 800 del self[header] 801 self[header] = type 802 # Skip the first param; it's the old type. 803 for p, v in params[1:]: 804 self.set_param(p, v, header, requote) 805 806 def get_filename(self, failobj=None): 807 """Return the filename associated with the payload if present. 808 809 The filename is extracted from the Content-Disposition header's 810 `filename' parameter, and it is unquoted. If that header is missing 811 the `filename' parameter, this method falls back to looking for the 812 `name' parameter. 813 """ 814 missing = object() 815 filename = self.get_param('filename', missing, 'content-disposition') 816 if filename is missing: 817 filename = self.get_param('name', missing, 'content-type') 818 if filename is missing: 819 return failobj 820 return utils.collapse_rfc2231_value(filename).strip() 821 822 def get_boundary(self, failobj=None): 823 """Return the boundary associated with the payload if present. 824 825 The boundary is extracted from the Content-Type header's `boundary' 826 parameter, and it is unquoted. 827 """ 828 missing = object() 829 boundary = self.get_param('boundary', missing) 830 if boundary is missing: 831 return failobj 832 # RFC 2046 says that boundaries may begin but not end in w/s 833 return utils.collapse_rfc2231_value(boundary).rstrip() 834 835 def set_boundary(self, boundary): 836 """Set the boundary parameter in Content-Type to 'boundary'. 837 838 This is subtly different than deleting the Content-Type header and 839 adding a new one with a new boundary parameter via add_header(). The 840 main difference is that using the set_boundary() method preserves the 841 order of the Content-Type header in the original message. 842 843 HeaderParseError is raised if the message has no Content-Type header. 844 """ 845 missing = object() 846 params = self._get_params_preserve(missing, 'content-type') 847 if params is missing: 848 # There was no Content-Type header, and we don't know what type 849 # to set it to, so raise an exception. 850 raise errors.HeaderParseError('No Content-Type header found') 851 newparams = [] 852 foundp = False 853 for pk, pv in params: 854 if pk.lower() == 'boundary': 855 newparams.append(('boundary', '"%s"' % boundary)) 856 foundp = True 857 else: 858 newparams.append((pk, pv)) 859 if not foundp: 860 # The original Content-Type header had no boundary attribute. 861 # Tack one on the end. BAW: should we raise an exception 862 # instead??? 863 newparams.append(('boundary', '"%s"' % boundary)) 864 # Replace the existing Content-Type header with the new value 865 newheaders = [] 866 for h, v in self._headers: 867 if h.lower() == 'content-type': 868 parts = [] 869 for k, v in newparams: 870 if v == '': 871 parts.append(k) 872 else: 873 parts.append('%s=%s' % (k, v)) 874 val = SEMISPACE.join(parts) 875 newheaders.append(self.policy.header_store_parse(h, val)) 876 877 else: 878 newheaders.append((h, v)) 879 self._headers = newheaders 880 881 def get_content_charset(self, failobj=None): 882 """Return the charset parameter of the Content-Type header. 883 884 The returned string is always coerced to lower case. If there is no 885 Content-Type header, or if that header has no charset parameter, 886 failobj is returned. 887 """ 888 missing = object() 889 charset = self.get_param('charset', missing) 890 if charset is missing: 891 return failobj 892 if isinstance(charset, tuple): 893 # RFC 2231 encoded, so decode it, and it better end up as ascii. 894 pcharset = charset[0] or 'us-ascii' 895 try: 896 # LookupError will be raised if the charset isn't known to 897 # Python. UnicodeError will be raised if the encoded text 898 # contains a character not in the charset. 899 as_bytes = charset[2].encode('raw-unicode-escape') 900 charset = str(as_bytes, pcharset) 901 except (LookupError, UnicodeError): 902 charset = charset[2] 903 # charset characters must be in us-ascii range 904 try: 905 charset.encode('us-ascii') 906 except UnicodeError: 907 return failobj 908 # RFC 2046, $4.1.2 says charsets are not case sensitive 909 return charset.lower() 910 911 def get_charsets(self, failobj=None): 912 """Return a list containing the charset(s) used in this message. 913 914 The returned list of items describes the Content-Type headers' 915 charset parameter for this message and all the subparts in its 916 payload. 917 918 Each item will either be a string (the value of the charset parameter 919 in the Content-Type header of that part) or the value of the 920 'failobj' parameter (defaults to None), if the part does not have a 921 main MIME type of "text", or the charset is not defined. 922 923 The list will contain one string for each part of the message, plus 924 one for the container message (i.e. self), so that a non-multipart 925 message will still return a list of length 1. 926 """ 927 return [part.get_content_charset(failobj) for part in self.walk()] 928 929 def get_content_disposition(self): 930 """Return the message's content-disposition if it exists, or None. 931 932 The return values can be either 'inline', 'attachment' or None 933 according to the rfc2183. 934 """ 935 value = self.get('content-disposition') 936 if value is None: 937 return None 938 c_d = _splitparam(value)[0].lower() 939 return c_d 940 941 # I.e. def walk(self): ... 942 from email.iterators import walk 943 944 945class MIMEPart(Message): 946 947 def __init__(self, policy=None): 948 if policy is None: 949 from email.policy import default 950 policy = default 951 Message.__init__(self, policy) 952 953 954 def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): 955 """Return the entire formatted message as a string. 956 957 Optional 'unixfrom', when true, means include the Unix From_ envelope 958 header. maxheaderlen is retained for backward compatibility with the 959 base Message class, but defaults to None, meaning that the policy value 960 for max_line_length controls the header maximum length. 'policy' is 961 passed to the Generator instance used to serialize the message; if it 962 is not specified the policy associated with the message instance is 963 used. 964 """ 965 policy = self.policy if policy is None else policy 966 if maxheaderlen is None: 967 maxheaderlen = policy.max_line_length 968 return super().as_string(maxheaderlen=maxheaderlen, policy=policy) 969 970 def __str__(self): 971 return self.as_string(policy=self.policy.clone(utf8=True)) 972 973 def is_attachment(self): 974 c_d = self.get('content-disposition') 975 return False if c_d is None else c_d.content_disposition == 'attachment' 976 977 def _find_body(self, part, preferencelist): 978 if part.is_attachment(): 979 return 980 maintype, subtype = part.get_content_type().split('/') 981 if maintype == 'text': 982 if subtype in preferencelist: 983 yield (preferencelist.index(subtype), part) 984 return 985 if maintype != 'multipart': 986 return 987 if subtype != 'related': 988 for subpart in part.iter_parts(): 989 yield from self._find_body(subpart, preferencelist) 990 return 991 if 'related' in preferencelist: 992 yield (preferencelist.index('related'), part) 993 candidate = None 994 start = part.get_param('start') 995 if start: 996 for subpart in part.iter_parts(): 997 if subpart['content-id'] == start: 998 candidate = subpart 999 break 1000 if candidate is None: 1001 subparts = part.get_payload() 1002 candidate = subparts[0] if subparts else None 1003 if candidate is not None: 1004 yield from self._find_body(candidate, preferencelist) 1005 1006 def get_body(self, preferencelist=('related', 'html', 'plain')): 1007 """Return best candidate mime part for display as 'body' of message. 1008 1009 Do a depth first search, starting with self, looking for the first part 1010 matching each of the items in preferencelist, and return the part 1011 corresponding to the first item that has a match, or None if no items 1012 have a match. If 'related' is not included in preferencelist, consider 1013 the root part of any multipart/related encountered as a candidate 1014 match. Ignore parts with 'Content-Disposition: attachment'. 1015 """ 1016 best_prio = len(preferencelist) 1017 body = None 1018 for prio, part in self._find_body(self, preferencelist): 1019 if prio < best_prio: 1020 best_prio = prio 1021 body = part 1022 if prio == 0: 1023 break 1024 return body 1025 1026 _body_types = {('text', 'plain'), 1027 ('text', 'html'), 1028 ('multipart', 'related'), 1029 ('multipart', 'alternative')} 1030 def iter_attachments(self): 1031 """Return an iterator over the non-main parts of a multipart. 1032 1033 Skip the first of each occurrence of text/plain, text/html, 1034 multipart/related, or multipart/alternative in the multipart (unless 1035 they have a 'Content-Disposition: attachment' header) and include all 1036 remaining subparts in the returned iterator. When applied to a 1037 multipart/related, return all parts except the root part. Return an 1038 empty iterator when applied to a multipart/alternative or a 1039 non-multipart. 1040 """ 1041 maintype, subtype = self.get_content_type().split('/') 1042 if maintype != 'multipart' or subtype == 'alternative': 1043 return 1044 payload = self.get_payload() 1045 # Certain malformed messages can have content type set to `multipart/*` 1046 # but still have single part body, in which case payload.copy() can 1047 # fail with AttributeError. 1048 try: 1049 parts = payload.copy() 1050 except AttributeError: 1051 # payload is not a list, it is most probably a string. 1052 return 1053 1054 if maintype == 'multipart' and subtype == 'related': 1055 # For related, we treat everything but the root as an attachment. 1056 # The root may be indicated by 'start'; if there's no start or we 1057 # can't find the named start, treat the first subpart as the root. 1058 start = self.get_param('start') 1059 if start: 1060 found = False 1061 attachments = [] 1062 for part in parts: 1063 if part.get('content-id') == start: 1064 found = True 1065 else: 1066 attachments.append(part) 1067 if found: 1068 yield from attachments 1069 return 1070 parts.pop(0) 1071 yield from parts 1072 return 1073 # Otherwise we more or less invert the remaining logic in get_body. 1074 # This only really works in edge cases (ex: non-text related or 1075 # alternatives) if the sending agent sets content-disposition. 1076 seen = [] # Only skip the first example of each candidate type. 1077 for part in parts: 1078 maintype, subtype = part.get_content_type().split('/') 1079 if ((maintype, subtype) in self._body_types and 1080 not part.is_attachment() and subtype not in seen): 1081 seen.append(subtype) 1082 continue 1083 yield part 1084 1085 def iter_parts(self): 1086 """Return an iterator over all immediate subparts of a multipart. 1087 1088 Return an empty iterator for a non-multipart. 1089 """ 1090 if self.get_content_maintype() == 'multipart': 1091 yield from self.get_payload() 1092 1093 def get_content(self, *args, content_manager=None, **kw): 1094 if content_manager is None: 1095 content_manager = self.policy.content_manager 1096 return content_manager.get_content(self, *args, **kw) 1097 1098 def set_content(self, *args, content_manager=None, **kw): 1099 if content_manager is None: 1100 content_manager = self.policy.content_manager 1101 content_manager.set_content(self, *args, **kw) 1102 1103 def _make_multipart(self, subtype, disallowed_subtypes, boundary): 1104 if self.get_content_maintype() == 'multipart': 1105 existing_subtype = self.get_content_subtype() 1106 disallowed_subtypes = disallowed_subtypes + (subtype,) 1107 if existing_subtype in disallowed_subtypes: 1108 raise ValueError("Cannot convert {} to {}".format( 1109 existing_subtype, subtype)) 1110 keep_headers = [] 1111 part_headers = [] 1112 for name, value in self._headers: 1113 if name.lower().startswith('content-'): 1114 part_headers.append((name, value)) 1115 else: 1116 keep_headers.append((name, value)) 1117 if part_headers: 1118 # There is existing content, move it to the first subpart. 1119 part = type(self)(policy=self.policy) 1120 part._headers = part_headers 1121 part._payload = self._payload 1122 self._payload = [part] 1123 else: 1124 self._payload = [] 1125 self._headers = keep_headers 1126 self['Content-Type'] = 'multipart/' + subtype 1127 if boundary is not None: 1128 self.set_param('boundary', boundary) 1129 1130 def make_related(self, boundary=None): 1131 self._make_multipart('related', ('alternative', 'mixed'), boundary) 1132 1133 def make_alternative(self, boundary=None): 1134 self._make_multipart('alternative', ('mixed',), boundary) 1135 1136 def make_mixed(self, boundary=None): 1137 self._make_multipart('mixed', (), boundary) 1138 1139 def _add_multipart(self, _subtype, *args, _disp=None, **kw): 1140 if (self.get_content_maintype() != 'multipart' or 1141 self.get_content_subtype() != _subtype): 1142 getattr(self, 'make_' + _subtype)() 1143 part = type(self)(policy=self.policy) 1144 part.set_content(*args, **kw) 1145 if _disp and 'content-disposition' not in part: 1146 part['Content-Disposition'] = _disp 1147 self.attach(part) 1148 1149 def add_related(self, *args, **kw): 1150 self._add_multipart('related', *args, _disp='inline', **kw) 1151 1152 def add_alternative(self, *args, **kw): 1153 self._add_multipart('alternative', *args, **kw) 1154 1155 def add_attachment(self, *args, **kw): 1156 self._add_multipart('mixed', *args, _disp='attachment', **kw) 1157 1158 def clear(self): 1159 self._headers = [] 1160 self._payload = None 1161 1162 def clear_content(self): 1163 self._headers = [(n, v) for n, v in self._headers 1164 if not n.lower().startswith('content-')] 1165 self._payload = None 1166 1167 1168class EmailMessage(MIMEPart): 1169 1170 def set_content(self, *args, **kw): 1171 super().set_content(*args, **kw) 1172 if 'MIME-Version' not in self: 1173 self['MIME-Version'] = '1.0' 1174