1# Copyright (C) 2001-2007 Python Software Foundation 2# Author: Barry Warsaw 3# Contact: email-sig@python.org 4 5"""Basic message object for the email package object model.""" 6 7__all__ = ['Message', 'EmailMessage'] 8 9import binascii 10import re 11import quopri 12from io import BytesIO, StringIO 13 14# Intrapackage imports 15from email import utils 16from email import errors 17from email._policybase import compat32 18from email import charset as _charset 19from email._encoded_words import decode_b 20Charset = _charset.Charset 21 22SEMISPACE = '; ' 23 24# Regular expression that matches `special' characters in parameters, the 25# existence of which force quoting of the parameter value. 26tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') 27 28 29def _splitparam(param): 30 # Split header parameters. BAW: this may be too simple. It isn't 31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers 32 # found in the wild. We may eventually need a full fledged parser. 33 # RDM: we might have a Header here; for now just stringify it. 34 a, sep, b = str(param).partition(';') 35 if not sep: 36 return a.strip(), None 37 return a.strip(), b.strip() 38 39def _formatparam(param, value=None, quote=True): 40 """Convenience function to format and return a key=value pair. 41 42 This will quote the value if needed or if quote is true. If value is a 43 three tuple (charset, language, value), it will be encoded according 44 to RFC2231 rules. If it contains non-ascii characters it will likewise 45 be encoded according to RFC2231 rules, using the utf-8 charset and 46 a null language. 47 """ 48 if value is not None and len(value) > 0: 49 # A tuple is used for RFC 2231 encoded parameter values where items 50 # are (charset, language, value). charset is a string, not a Charset 51 # instance. RFC 2231 encoded values are never quoted, per RFC. 52 if isinstance(value, tuple): 53 # Encode as per RFC 2231 54 param += '*' 55 value = utils.encode_rfc2231(value[2], value[0], value[1]) 56 return '%s=%s' % (param, value) 57 else: 58 try: 59 value.encode('ascii') 60 except UnicodeEncodeError: 61 param += '*' 62 value = utils.encode_rfc2231(value, 'utf-8', '') 63 return '%s=%s' % (param, value) 64 # BAW: Please check this. I think that if quote is set it should 65 # force quoting even if not necessary. 66 if quote or tspecials.search(value): 67 return '%s="%s"' % (param, utils.quote(value)) 68 else: 69 return '%s=%s' % (param, value) 70 else: 71 return param 72 73def _parseparam(s): 74 # RDM This might be a Header, so for now stringify it. 75 s = ';' + str(s) 76 plist = [] 77 while s[:1] == ';': 78 s = s[1:] 79 end = s.find(';') 80 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: 81 end = s.find(';', end + 1) 82 if end < 0: 83 end = len(s) 84 f = s[:end] 85 if '=' in f: 86 i = f.index('=') 87 f = f[:i].strip().lower() + '=' + f[i+1:].strip() 88 plist.append(f.strip()) 89 s = s[end:] 90 return plist 91 92 93def _unquotevalue(value): 94 # This is different than utils.collapse_rfc2231_value() because it doesn't 95 # try to convert the value to a unicode. Message.get_param() and 96 # Message.get_params() are both currently defined to return the tuple in 97 # the face of RFC 2231 parameters. 98 if isinstance(value, tuple): 99 return value[0], value[1], utils.unquote(value[2]) 100 else: 101 return utils.unquote(value) 102 103 104def _decode_uu(encoded): 105 """Decode uuencoded data.""" 106 decoded_lines = [] 107 encoded_lines_iter = iter(encoded.splitlines()) 108 for line in encoded_lines_iter: 109 if line.startswith(b"begin "): 110 mode, _, path = line.removeprefix(b"begin ").partition(b" ") 111 try: 112 int(mode, base=8) 113 except ValueError: 114 continue 115 else: 116 break 117 else: 118 raise ValueError("`begin` line not found") 119 for line in encoded_lines_iter: 120 if not line: 121 raise ValueError("Truncated input") 122 elif line.strip(b' \t\r\n\f') == b'end': 123 break 124 try: 125 decoded_line = binascii.a2b_uu(line) 126 except binascii.Error: 127 # Workaround for broken uuencoders by /Fredrik Lundh 128 nbytes = (((line[0]-32) & 63) * 4 + 5) // 3 129 decoded_line = binascii.a2b_uu(line[:nbytes]) 130 decoded_lines.append(decoded_line) 131 132 return b''.join(decoded_lines) 133 134 135class Message: 136 """Basic message object. 137 138 A message object is defined as something that has a bunch of RFC 2822 139 headers and a payload. It may optionally have an envelope header 140 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a 141 multipart or a message/rfc822), then the payload is a list of Message 142 objects, otherwise it is a string. 143 144 Message objects implement part of the `mapping' interface, which assumes 145 there is exactly one occurrence of the header per message. Some headers 146 do in fact appear multiple times (e.g. Received) and for those headers, 147 you must use the explicit API to set or get all the headers. Not all of 148 the mapping methods are implemented. 149 """ 150 def __init__(self, policy=compat32): 151 self.policy = policy 152 self._headers = [] 153 self._unixfrom = None 154 self._payload = None 155 self._charset = None 156 # Defaults for multipart messages 157 self.preamble = self.epilogue = None 158 self.defects = [] 159 # Default content type 160 self._default_type = 'text/plain' 161 162 def __str__(self): 163 """Return the entire formatted message as a string. 164 """ 165 return self.as_string() 166 167 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None): 168 """Return the entire formatted message as a string. 169 170 Optional 'unixfrom', when true, means include the Unix From_ envelope 171 header. For backward compatibility reasons, if maxheaderlen is 172 not specified it defaults to 0, so you must override it explicitly 173 if you want a different maxheaderlen. 'policy' is passed to the 174 Generator instance used to serialize the message; if it is not 175 specified the policy associated with the message instance is used. 176 177 If the message object contains binary data that is not encoded 178 according to RFC standards, the non-compliant data will be replaced by 179 unicode "unknown character" code points. 180 """ 181 from email.generator import Generator 182 policy = self.policy if policy is None else policy 183 fp = StringIO() 184 g = Generator(fp, 185 mangle_from_=False, 186 maxheaderlen=maxheaderlen, 187 policy=policy) 188 g.flatten(self, unixfrom=unixfrom) 189 return fp.getvalue() 190 191 def __bytes__(self): 192 """Return the entire formatted message as a bytes object. 193 """ 194 return self.as_bytes() 195 196 def as_bytes(self, unixfrom=False, policy=None): 197 """Return the entire formatted message as a bytes object. 198 199 Optional 'unixfrom', when true, means include the Unix From_ envelope 200 header. 'policy' is passed to the BytesGenerator instance used to 201 serialize the message; if not specified the policy associated with 202 the message instance is used. 203 """ 204 from email.generator import BytesGenerator 205 policy = self.policy if policy is None else policy 206 fp = BytesIO() 207 g = BytesGenerator(fp, mangle_from_=False, policy=policy) 208 g.flatten(self, unixfrom=unixfrom) 209 return fp.getvalue() 210 211 def is_multipart(self): 212 """Return True if the message consists of multiple parts.""" 213 return isinstance(self._payload, list) 214 215 # 216 # Unix From_ line 217 # 218 def set_unixfrom(self, unixfrom): 219 self._unixfrom = unixfrom 220 221 def get_unixfrom(self): 222 return self._unixfrom 223 224 # 225 # Payload manipulation. 226 # 227 def attach(self, payload): 228 """Add the given payload to the current payload. 229 230 The current payload will always be a list of objects after this method 231 is called. If you want to set the payload to a scalar object, use 232 set_payload() instead. 233 """ 234 if self._payload is None: 235 self._payload = [payload] 236 else: 237 try: 238 self._payload.append(payload) 239 except AttributeError: 240 raise TypeError("Attach is not valid on a message with a" 241 " non-multipart payload") 242 243 def get_payload(self, i=None, decode=False): 244 """Return a reference to the payload. 245 246 The payload will either be a list object or a string. If you mutate 247 the list object, you modify the message's payload in place. Optional 248 i returns that index into the payload. 249 250 Optional decode is a flag indicating whether the payload should be 251 decoded or not, according to the Content-Transfer-Encoding header 252 (default is False). 253 254 When True and the message is not a multipart, the payload will be 255 decoded if this header's value is `quoted-printable' or `base64'. If 256 some other encoding is used, or the header is missing, or if the 257 payload has bogus data (i.e. bogus base64 or uuencoded data), the 258 payload is returned as-is. 259 260 If the message is a multipart and the decode flag is True, then None 261 is returned. 262 """ 263 # Here is the logic table for this code, based on the email5.0.0 code: 264 # i decode is_multipart result 265 # ------ ------ ------------ ------------------------------ 266 # None True True None 267 # i True True None 268 # None False True _payload (a list) 269 # i False True _payload element i (a Message) 270 # i False False error (not a list) 271 # i True False error (not a list) 272 # None False False _payload 273 # None True False _payload decoded (bytes) 274 # Note that Barry planned to factor out the 'decode' case, but that 275 # isn't so easy now that we handle the 8 bit data, which needs to be 276 # converted in both the decode and non-decode path. 277 if self.is_multipart(): 278 if decode: 279 return None 280 if i is None: 281 return self._payload 282 else: 283 return self._payload[i] 284 # For backward compatibility, Use isinstance and this error message 285 # instead of the more logical is_multipart test. 286 if i is not None and not isinstance(self._payload, list): 287 raise TypeError('Expected list, got %s' % type(self._payload)) 288 payload = self._payload 289 # cte might be a Header, so for now stringify it. 290 cte = str(self.get('content-transfer-encoding', '')).lower() 291 # payload may be bytes here. 292 if not decode: 293 if isinstance(payload, str) and utils._has_surrogates(payload): 294 try: 295 bpayload = payload.encode('ascii', 'surrogateescape') 296 try: 297 payload = bpayload.decode(self.get_content_charset('ascii'), 'replace') 298 except LookupError: 299 payload = bpayload.decode('ascii', 'replace') 300 except UnicodeEncodeError: 301 pass 302 return payload 303 if isinstance(payload, str): 304 try: 305 bpayload = payload.encode('ascii', 'surrogateescape') 306 except UnicodeEncodeError: 307 # This won't happen for RFC compliant messages (messages 308 # containing only ASCII code points in the unicode input). 309 # If it does happen, turn the string into bytes in a way 310 # guaranteed not to fail. 311 bpayload = payload.encode('raw-unicode-escape') 312 if cte == 'quoted-printable': 313 return quopri.decodestring(bpayload) 314 elif cte == 'base64': 315 # XXX: this is a bit of a hack; decode_b should probably be factored 316 # out somewhere, but I haven't figured out where yet. 317 value, defects = decode_b(b''.join(bpayload.splitlines())) 318 for defect in defects: 319 self.policy.handle_defect(self, defect) 320 return value 321 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 322 try: 323 return _decode_uu(bpayload) 324 except ValueError: 325 # Some decoding problem. 326 return bpayload 327 if isinstance(payload, str): 328 return bpayload 329 return payload 330 331 def set_payload(self, payload, charset=None): 332 """Set the payload to the given value. 333 334 Optional charset sets the message's default character set. See 335 set_charset() for details. 336 """ 337 if hasattr(payload, 'encode'): 338 if charset is None: 339 self._payload = payload 340 return 341 if not isinstance(charset, Charset): 342 charset = Charset(charset) 343 payload = payload.encode(charset.output_charset, 'surrogateescape') 344 if hasattr(payload, 'decode'): 345 self._payload = payload.decode('ascii', 'surrogateescape') 346 else: 347 self._payload = payload 348 if charset is not None: 349 self.set_charset(charset) 350 351 def set_charset(self, charset): 352 """Set the charset of the payload to a given character set. 353 354 charset can be a Charset instance, a string naming a character set, or 355 None. If it is a string it will be converted to a Charset instance. 356 If charset is None, the charset parameter will be removed from the 357 Content-Type field. Anything else will generate a TypeError. 358 359 The message will be assumed to be of type text/* encoded with 360 charset.input_charset. It will be converted to charset.output_charset 361 and encoded properly, if needed, when generating the plain text 362 representation of the message. MIME headers (MIME-Version, 363 Content-Type, Content-Transfer-Encoding) will be added as needed. 364 """ 365 if charset is None: 366 self.del_param('charset') 367 self._charset = None 368 return 369 if not isinstance(charset, Charset): 370 charset = Charset(charset) 371 self._charset = charset 372 if 'MIME-Version' not in self: 373 self.add_header('MIME-Version', '1.0') 374 if 'Content-Type' not in self: 375 self.add_header('Content-Type', 'text/plain', 376 charset=charset.get_output_charset()) 377 else: 378 self.set_param('charset', charset.get_output_charset()) 379 if charset != charset.get_output_charset(): 380 self._payload = charset.body_encode(self._payload) 381 if 'Content-Transfer-Encoding' not in self: 382 cte = charset.get_body_encoding() 383 try: 384 cte(self) 385 except TypeError: 386 # This 'if' is for backward compatibility, it allows unicode 387 # through even though that won't work correctly if the 388 # message is serialized. 389 payload = self._payload 390 if payload: 391 try: 392 payload = payload.encode('ascii', 'surrogateescape') 393 except UnicodeError: 394 payload = payload.encode(charset.output_charset) 395 self._payload = charset.body_encode(payload) 396 self.add_header('Content-Transfer-Encoding', cte) 397 398 def get_charset(self): 399 """Return the Charset instance associated with the message's payload. 400 """ 401 return self._charset 402 403 # 404 # MAPPING INTERFACE (partial) 405 # 406 def __len__(self): 407 """Return the total number of headers, including duplicates.""" 408 return len(self._headers) 409 410 def __getitem__(self, name): 411 """Get a header value. 412 413 Return None if the header is missing instead of raising an exception. 414 415 Note that if the header appeared multiple times, exactly which 416 occurrence gets returned is undefined. Use get_all() to get all 417 the values matching a header field name. 418 """ 419 return self.get(name) 420 421 def __setitem__(self, name, val): 422 """Set the value of a header. 423 424 Note: this does not overwrite an existing header with the same field 425 name. Use __delitem__() first to delete any existing headers. 426 """ 427 max_count = self.policy.header_max_count(name) 428 if max_count: 429 lname = name.lower() 430 found = 0 431 for k, v in self._headers: 432 if k.lower() == lname: 433 found += 1 434 if found >= max_count: 435 raise ValueError("There may be at most {} {} headers " 436 "in a message".format(max_count, name)) 437 self._headers.append(self.policy.header_store_parse(name, val)) 438 439 def __delitem__(self, name): 440 """Delete all occurrences of a header, if present. 441 442 Does not raise an exception if the header is missing. 443 """ 444 name = name.lower() 445 newheaders = [] 446 for k, v in self._headers: 447 if k.lower() != name: 448 newheaders.append((k, v)) 449 self._headers = newheaders 450 451 def __contains__(self, name): 452 name_lower = name.lower() 453 for k, v in self._headers: 454 if name_lower == k.lower(): 455 return True 456 return False 457 458 def __iter__(self): 459 for field, value in self._headers: 460 yield field 461 462 def keys(self): 463 """Return a list of all the message's header field names. 464 465 These will be sorted in the order they appeared in the original 466 message, or were added to the message, and may contain duplicates. 467 Any fields deleted and re-inserted are always appended to the header 468 list. 469 """ 470 return [k for k, v in self._headers] 471 472 def values(self): 473 """Return a list of all the message's header values. 474 475 These will be sorted in the order they appeared in the original 476 message, or were added to the message, and may contain duplicates. 477 Any fields deleted and re-inserted are always appended to the header 478 list. 479 """ 480 return [self.policy.header_fetch_parse(k, v) 481 for k, v in self._headers] 482 483 def items(self): 484 """Get all the message's header fields and values. 485 486 These will be sorted in the order they appeared in the original 487 message, or were added to the message, and may contain duplicates. 488 Any fields deleted and re-inserted are always appended to the header 489 list. 490 """ 491 return [(k, self.policy.header_fetch_parse(k, v)) 492 for k, v in self._headers] 493 494 def get(self, name, failobj=None): 495 """Get a header value. 496 497 Like __getitem__() but return failobj instead of None when the field 498 is missing. 499 """ 500 name = name.lower() 501 for k, v in self._headers: 502 if k.lower() == name: 503 return self.policy.header_fetch_parse(k, v) 504 return failobj 505 506 # 507 # "Internal" methods (public API, but only intended for use by a parser 508 # or generator, not normal application code. 509 # 510 511 def set_raw(self, name, value): 512 """Store name and value in the model without modification. 513 514 This is an "internal" API, intended only for use by a parser. 515 """ 516 self._headers.append((name, value)) 517 518 def raw_items(self): 519 """Return the (name, value) header pairs without modification. 520 521 This is an "internal" API, intended only for use by a generator. 522 """ 523 return iter(self._headers.copy()) 524 525 # 526 # Additional useful stuff 527 # 528 529 def get_all(self, name, failobj=None): 530 """Return a list of all the values for the named field. 531 532 These will be sorted in the order they appeared in the original 533 message, and may contain duplicates. Any fields deleted and 534 re-inserted are always appended to the header list. 535 536 If no such fields exist, failobj is returned (defaults to None). 537 """ 538 values = [] 539 name = name.lower() 540 for k, v in self._headers: 541 if k.lower() == name: 542 values.append(self.policy.header_fetch_parse(k, v)) 543 if not values: 544 return failobj 545 return values 546 547 def add_header(self, _name, _value, **_params): 548 """Extended header setting. 549 550 name is the header field to add. keyword arguments can be used to set 551 additional parameters for the header field, with underscores converted 552 to dashes. Normally the parameter will be added as key="value" unless 553 value is None, in which case only the key will be added. If a 554 parameter value contains non-ASCII characters it can be specified as a 555 three-tuple of (charset, language, value), in which case it will be 556 encoded according to RFC2231 rules. Otherwise it will be encoded using 557 the utf-8 charset and a language of ''. 558 559 Examples: 560 561 msg.add_header('content-disposition', 'attachment', filename='bud.gif') 562 msg.add_header('content-disposition', 'attachment', 563 filename=('utf-8', '', Fußballer.ppt')) 564 msg.add_header('content-disposition', 'attachment', 565 filename='Fußballer.ppt')) 566 """ 567 parts = [] 568 for k, v in _params.items(): 569 if v is None: 570 parts.append(k.replace('_', '-')) 571 else: 572 parts.append(_formatparam(k.replace('_', '-'), v)) 573 if _value is not None: 574 parts.insert(0, _value) 575 self[_name] = SEMISPACE.join(parts) 576 577 def replace_header(self, _name, _value): 578 """Replace a header. 579 580 Replace the first matching header found in the message, retaining 581 header order and case. If no matching header was found, a KeyError is 582 raised. 583 """ 584 _name = _name.lower() 585 for i, (k, v) in zip(range(len(self._headers)), self._headers): 586 if k.lower() == _name: 587 self._headers[i] = self.policy.header_store_parse(k, _value) 588 break 589 else: 590 raise KeyError(_name) 591 592 # 593 # Use these three methods instead of the three above. 594 # 595 596 def get_content_type(self): 597 """Return the message's content type. 598 599 The returned string is coerced to lower case of the form 600 `maintype/subtype'. If there was no Content-Type header in the 601 message, the default type as given by get_default_type() will be 602 returned. Since according to RFC 2045, messages always have a default 603 type this will always return a value. 604 605 RFC 2045 defines a message's default type to be text/plain unless it 606 appears inside a multipart/digest container, in which case it would be 607 message/rfc822. 608 """ 609 missing = object() 610 value = self.get('content-type', missing) 611 if value is missing: 612 # This should have no parameters 613 return self.get_default_type() 614 ctype = _splitparam(value)[0].lower() 615 # RFC 2045, section 5.2 says if its invalid, use text/plain 616 if ctype.count('/') != 1: 617 return 'text/plain' 618 return ctype 619 620 def get_content_maintype(self): 621 """Return the message's main content type. 622 623 This is the `maintype' part of the string returned by 624 get_content_type(). 625 """ 626 ctype = self.get_content_type() 627 return ctype.split('/')[0] 628 629 def get_content_subtype(self): 630 """Returns the message's sub-content type. 631 632 This is the `subtype' part of the string returned by 633 get_content_type(). 634 """ 635 ctype = self.get_content_type() 636 return ctype.split('/')[1] 637 638 def get_default_type(self): 639 """Return the `default' content type. 640 641 Most messages have a default content type of text/plain, except for 642 messages that are subparts of multipart/digest containers. Such 643 subparts have a default content type of message/rfc822. 644 """ 645 return self._default_type 646 647 def set_default_type(self, ctype): 648 """Set the `default' content type. 649 650 ctype should be either "text/plain" or "message/rfc822", although this 651 is not enforced. The default content type is not stored in the 652 Content-Type header. 653 """ 654 self._default_type = ctype 655 656 def _get_params_preserve(self, failobj, header): 657 # Like get_params() but preserves the quoting of values. BAW: 658 # should this be part of the public interface? 659 missing = object() 660 value = self.get(header, missing) 661 if value is missing: 662 return failobj 663 params = [] 664 for p in _parseparam(value): 665 try: 666 name, val = p.split('=', 1) 667 name = name.strip() 668 val = val.strip() 669 except ValueError: 670 # Must have been a bare attribute 671 name = p.strip() 672 val = '' 673 params.append((name, val)) 674 params = utils.decode_params(params) 675 return params 676 677 def get_params(self, failobj=None, header='content-type', unquote=True): 678 """Return the message's Content-Type parameters, as a list. 679 680 The elements of the returned list are 2-tuples of key/value pairs, as 681 split on the `=' sign. The left hand side of the `=' is the key, 682 while the right hand side is the value. If there is no `=' sign in 683 the parameter the value is the empty string. The value is as 684 described in the get_param() method. 685 686 Optional failobj is the object to return if there is no Content-Type 687 header. Optional header is the header to search instead of 688 Content-Type. If unquote is True, the value is unquoted. 689 """ 690 missing = object() 691 params = self._get_params_preserve(missing, header) 692 if params is missing: 693 return failobj 694 if unquote: 695 return [(k, _unquotevalue(v)) for k, v in params] 696 else: 697 return params 698 699 def get_param(self, param, failobj=None, header='content-type', 700 unquote=True): 701 """Return the parameter value if found in the Content-Type header. 702 703 Optional failobj is the object to return if there is no Content-Type 704 header, or the Content-Type header has no such parameter. Optional 705 header is the header to search instead of Content-Type. 706 707 Parameter keys are always compared case insensitively. The return 708 value can either be a string, or a 3-tuple if the parameter was RFC 709 2231 encoded. When it's a 3-tuple, the elements of the value are of 710 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and 711 LANGUAGE can be None, in which case you should consider VALUE to be 712 encoded in the us-ascii charset. You can usually ignore LANGUAGE. 713 The parameter value (either the returned string, or the VALUE item in 714 the 3-tuple) is always unquoted, unless unquote is set to False. 715 716 If your application doesn't care whether the parameter was RFC 2231 717 encoded, it can turn the return value into a string as follows: 718 719 rawparam = msg.get_param('foo') 720 param = email.utils.collapse_rfc2231_value(rawparam) 721 722 """ 723 if header not in self: 724 return failobj 725 for k, v in self._get_params_preserve(failobj, header): 726 if k.lower() == param.lower(): 727 if unquote: 728 return _unquotevalue(v) 729 else: 730 return v 731 return failobj 732 733 def set_param(self, param, value, header='Content-Type', requote=True, 734 charset=None, language='', replace=False): 735 """Set a parameter in the Content-Type header. 736 737 If the parameter already exists in the header, its value will be 738 replaced with the new value. 739 740 If header is Content-Type and has not yet been defined for this 741 message, it will be set to "text/plain" and the new parameter and 742 value will be appended as per RFC 2045. 743 744 An alternate header can be specified in the header argument, and all 745 parameters will be quoted as necessary unless requote is False. 746 747 If charset is specified, the parameter will be encoded according to RFC 748 2231. Optional language specifies the RFC 2231 language, defaulting 749 to the empty string. Both charset and language should be strings. 750 """ 751 if not isinstance(value, tuple) and charset: 752 value = (charset, language, value) 753 754 if header not in self and header.lower() == 'content-type': 755 ctype = 'text/plain' 756 else: 757 ctype = self.get(header) 758 if not self.get_param(param, header=header): 759 if not ctype: 760 ctype = _formatparam(param, value, requote) 761 else: 762 ctype = SEMISPACE.join( 763 [ctype, _formatparam(param, value, requote)]) 764 else: 765 ctype = '' 766 for old_param, old_value in self.get_params(header=header, 767 unquote=requote): 768 append_param = '' 769 if old_param.lower() == param.lower(): 770 append_param = _formatparam(param, value, requote) 771 else: 772 append_param = _formatparam(old_param, old_value, requote) 773 if not ctype: 774 ctype = append_param 775 else: 776 ctype = SEMISPACE.join([ctype, append_param]) 777 if ctype != self.get(header): 778 if replace: 779 self.replace_header(header, ctype) 780 else: 781 del self[header] 782 self[header] = ctype 783 784 def del_param(self, param, header='content-type', requote=True): 785 """Remove the given parameter completely from the Content-Type header. 786 787 The header will be re-written in place without the parameter or its 788 value. All values will be quoted as necessary unless requote is 789 False. Optional header specifies an alternative to the Content-Type 790 header. 791 """ 792 if header not in self: 793 return 794 new_ctype = '' 795 for p, v in self.get_params(header=header, unquote=requote): 796 if p.lower() != param.lower(): 797 if not new_ctype: 798 new_ctype = _formatparam(p, v, requote) 799 else: 800 new_ctype = SEMISPACE.join([new_ctype, 801 _formatparam(p, v, requote)]) 802 if new_ctype != self.get(header): 803 del self[header] 804 self[header] = new_ctype 805 806 def set_type(self, type, header='Content-Type', requote=True): 807 """Set the main type and subtype for the Content-Type header. 808 809 type must be a string in the form "maintype/subtype", otherwise a 810 ValueError is raised. 811 812 This method replaces the Content-Type header, keeping all the 813 parameters in place. If requote is False, this leaves the existing 814 header's quoting as is. Otherwise, the parameters will be quoted (the 815 default). 816 817 An alternative header can be specified in the header argument. When 818 the Content-Type header is set, we'll always also add a MIME-Version 819 header. 820 """ 821 # BAW: should we be strict? 822 if not type.count('/') == 1: 823 raise ValueError 824 # Set the Content-Type, you get a MIME-Version 825 if header.lower() == 'content-type': 826 del self['mime-version'] 827 self['MIME-Version'] = '1.0' 828 if header not in self: 829 self[header] = type 830 return 831 params = self.get_params(header=header, unquote=requote) 832 del self[header] 833 self[header] = type 834 # Skip the first param; it's the old type. 835 for p, v in params[1:]: 836 self.set_param(p, v, header, requote) 837 838 def get_filename(self, failobj=None): 839 """Return the filename associated with the payload if present. 840 841 The filename is extracted from the Content-Disposition header's 842 `filename' parameter, and it is unquoted. If that header is missing 843 the `filename' parameter, this method falls back to looking for the 844 `name' parameter. 845 """ 846 missing = object() 847 filename = self.get_param('filename', missing, 'content-disposition') 848 if filename is missing: 849 filename = self.get_param('name', missing, 'content-type') 850 if filename is missing: 851 return failobj 852 return utils.collapse_rfc2231_value(filename).strip() 853 854 def get_boundary(self, failobj=None): 855 """Return the boundary associated with the payload if present. 856 857 The boundary is extracted from the Content-Type header's `boundary' 858 parameter, and it is unquoted. 859 """ 860 missing = object() 861 boundary = self.get_param('boundary', missing) 862 if boundary is missing: 863 return failobj 864 # RFC 2046 says that boundaries may begin but not end in w/s 865 return utils.collapse_rfc2231_value(boundary).rstrip() 866 867 def set_boundary(self, boundary): 868 """Set the boundary parameter in Content-Type to 'boundary'. 869 870 This is subtly different than deleting the Content-Type header and 871 adding a new one with a new boundary parameter via add_header(). The 872 main difference is that using the set_boundary() method preserves the 873 order of the Content-Type header in the original message. 874 875 HeaderParseError is raised if the message has no Content-Type header. 876 """ 877 missing = object() 878 params = self._get_params_preserve(missing, 'content-type') 879 if params is missing: 880 # There was no Content-Type header, and we don't know what type 881 # to set it to, so raise an exception. 882 raise errors.HeaderParseError('No Content-Type header found') 883 newparams = [] 884 foundp = False 885 for pk, pv in params: 886 if pk.lower() == 'boundary': 887 newparams.append(('boundary', '"%s"' % boundary)) 888 foundp = True 889 else: 890 newparams.append((pk, pv)) 891 if not foundp: 892 # The original Content-Type header had no boundary attribute. 893 # Tack one on the end. BAW: should we raise an exception 894 # instead??? 895 newparams.append(('boundary', '"%s"' % boundary)) 896 # Replace the existing Content-Type header with the new value 897 newheaders = [] 898 for h, v in self._headers: 899 if h.lower() == 'content-type': 900 parts = [] 901 for k, v in newparams: 902 if v == '': 903 parts.append(k) 904 else: 905 parts.append('%s=%s' % (k, v)) 906 val = SEMISPACE.join(parts) 907 newheaders.append(self.policy.header_store_parse(h, val)) 908 909 else: 910 newheaders.append((h, v)) 911 self._headers = newheaders 912 913 def get_content_charset(self, failobj=None): 914 """Return the charset parameter of the Content-Type header. 915 916 The returned string is always coerced to lower case. If there is no 917 Content-Type header, or if that header has no charset parameter, 918 failobj is returned. 919 """ 920 missing = object() 921 charset = self.get_param('charset', missing) 922 if charset is missing: 923 return failobj 924 if isinstance(charset, tuple): 925 # RFC 2231 encoded, so decode it, and it better end up as ascii. 926 pcharset = charset[0] or 'us-ascii' 927 try: 928 # LookupError will be raised if the charset isn't known to 929 # Python. UnicodeError will be raised if the encoded text 930 # contains a character not in the charset. 931 as_bytes = charset[2].encode('raw-unicode-escape') 932 charset = str(as_bytes, pcharset) 933 except (LookupError, UnicodeError): 934 charset = charset[2] 935 # charset characters must be in us-ascii range 936 try: 937 charset.encode('us-ascii') 938 except UnicodeError: 939 return failobj 940 # RFC 2046, $4.1.2 says charsets are not case sensitive 941 return charset.lower() 942 943 def get_charsets(self, failobj=None): 944 """Return a list containing the charset(s) used in this message. 945 946 The returned list of items describes the Content-Type headers' 947 charset parameter for this message and all the subparts in its 948 payload. 949 950 Each item will either be a string (the value of the charset parameter 951 in the Content-Type header of that part) or the value of the 952 'failobj' parameter (defaults to None), if the part does not have a 953 main MIME type of "text", or the charset is not defined. 954 955 The list will contain one string for each part of the message, plus 956 one for the container message (i.e. self), so that a non-multipart 957 message will still return a list of length 1. 958 """ 959 return [part.get_content_charset(failobj) for part in self.walk()] 960 961 def get_content_disposition(self): 962 """Return the message's content-disposition if it exists, or None. 963 964 The return values can be either 'inline', 'attachment' or None 965 according to the rfc2183. 966 """ 967 value = self.get('content-disposition') 968 if value is None: 969 return None 970 c_d = _splitparam(value)[0].lower() 971 return c_d 972 973 # I.e. def walk(self): ... 974 from email.iterators import walk 975 976 977class MIMEPart(Message): 978 979 def __init__(self, policy=None): 980 if policy is None: 981 from email.policy import default 982 policy = default 983 super().__init__(policy) 984 985 986 def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): 987 """Return the entire formatted message as a string. 988 989 Optional 'unixfrom', when true, means include the Unix From_ envelope 990 header. maxheaderlen is retained for backward compatibility with the 991 base Message class, but defaults to None, meaning that the policy value 992 for max_line_length controls the header maximum length. 'policy' is 993 passed to the Generator instance used to serialize the message; if it 994 is not specified the policy associated with the message instance is 995 used. 996 """ 997 policy = self.policy if policy is None else policy 998 if maxheaderlen is None: 999 maxheaderlen = policy.max_line_length 1000 return super().as_string(unixfrom, maxheaderlen, policy) 1001 1002 def __str__(self): 1003 return self.as_string(policy=self.policy.clone(utf8=True)) 1004 1005 def is_attachment(self): 1006 c_d = self.get('content-disposition') 1007 return False if c_d is None else c_d.content_disposition == 'attachment' 1008 1009 def _find_body(self, part, preferencelist): 1010 if part.is_attachment(): 1011 return 1012 maintype, subtype = part.get_content_type().split('/') 1013 if maintype == 'text': 1014 if subtype in preferencelist: 1015 yield (preferencelist.index(subtype), part) 1016 return 1017 if maintype != 'multipart' or not self.is_multipart(): 1018 return 1019 if subtype != 'related': 1020 for subpart in part.iter_parts(): 1021 yield from self._find_body(subpart, preferencelist) 1022 return 1023 if 'related' in preferencelist: 1024 yield (preferencelist.index('related'), part) 1025 candidate = None 1026 start = part.get_param('start') 1027 if start: 1028 for subpart in part.iter_parts(): 1029 if subpart['content-id'] == start: 1030 candidate = subpart 1031 break 1032 if candidate is None: 1033 subparts = part.get_payload() 1034 candidate = subparts[0] if subparts else None 1035 if candidate is not None: 1036 yield from self._find_body(candidate, preferencelist) 1037 1038 def get_body(self, preferencelist=('related', 'html', 'plain')): 1039 """Return best candidate mime part for display as 'body' of message. 1040 1041 Do a depth first search, starting with self, looking for the first part 1042 matching each of the items in preferencelist, and return the part 1043 corresponding to the first item that has a match, or None if no items 1044 have a match. If 'related' is not included in preferencelist, consider 1045 the root part of any multipart/related encountered as a candidate 1046 match. Ignore parts with 'Content-Disposition: attachment'. 1047 """ 1048 best_prio = len(preferencelist) 1049 body = None 1050 for prio, part in self._find_body(self, preferencelist): 1051 if prio < best_prio: 1052 best_prio = prio 1053 body = part 1054 if prio == 0: 1055 break 1056 return body 1057 1058 _body_types = {('text', 'plain'), 1059 ('text', 'html'), 1060 ('multipart', 'related'), 1061 ('multipart', 'alternative')} 1062 def iter_attachments(self): 1063 """Return an iterator over the non-main parts of a multipart. 1064 1065 Skip the first of each occurrence of text/plain, text/html, 1066 multipart/related, or multipart/alternative in the multipart (unless 1067 they have a 'Content-Disposition: attachment' header) and include all 1068 remaining subparts in the returned iterator. When applied to a 1069 multipart/related, return all parts except the root part. Return an 1070 empty iterator when applied to a multipart/alternative or a 1071 non-multipart. 1072 """ 1073 maintype, subtype = self.get_content_type().split('/') 1074 if maintype != 'multipart' or subtype == 'alternative': 1075 return 1076 payload = self.get_payload() 1077 # Certain malformed messages can have content type set to `multipart/*` 1078 # but still have single part body, in which case payload.copy() can 1079 # fail with AttributeError. 1080 try: 1081 parts = payload.copy() 1082 except AttributeError: 1083 # payload is not a list, it is most probably a string. 1084 return 1085 1086 if maintype == 'multipart' and subtype == 'related': 1087 # For related, we treat everything but the root as an attachment. 1088 # The root may be indicated by 'start'; if there's no start or we 1089 # can't find the named start, treat the first subpart as the root. 1090 start = self.get_param('start') 1091 if start: 1092 found = False 1093 attachments = [] 1094 for part in parts: 1095 if part.get('content-id') == start: 1096 found = True 1097 else: 1098 attachments.append(part) 1099 if found: 1100 yield from attachments 1101 return 1102 parts.pop(0) 1103 yield from parts 1104 return 1105 # Otherwise we more or less invert the remaining logic in get_body. 1106 # This only really works in edge cases (ex: non-text related or 1107 # alternatives) if the sending agent sets content-disposition. 1108 seen = [] # Only skip the first example of each candidate type. 1109 for part in parts: 1110 maintype, subtype = part.get_content_type().split('/') 1111 if ((maintype, subtype) in self._body_types and 1112 not part.is_attachment() and subtype not in seen): 1113 seen.append(subtype) 1114 continue 1115 yield part 1116 1117 def iter_parts(self): 1118 """Return an iterator over all immediate subparts of a multipart. 1119 1120 Return an empty iterator for a non-multipart. 1121 """ 1122 if self.is_multipart(): 1123 yield from self.get_payload() 1124 1125 def get_content(self, *args, content_manager=None, **kw): 1126 if content_manager is None: 1127 content_manager = self.policy.content_manager 1128 return content_manager.get_content(self, *args, **kw) 1129 1130 def set_content(self, *args, content_manager=None, **kw): 1131 if content_manager is None: 1132 content_manager = self.policy.content_manager 1133 content_manager.set_content(self, *args, **kw) 1134 1135 def _make_multipart(self, subtype, disallowed_subtypes, boundary): 1136 if self.get_content_maintype() == 'multipart': 1137 existing_subtype = self.get_content_subtype() 1138 disallowed_subtypes = disallowed_subtypes + (subtype,) 1139 if existing_subtype in disallowed_subtypes: 1140 raise ValueError("Cannot convert {} to {}".format( 1141 existing_subtype, subtype)) 1142 keep_headers = [] 1143 part_headers = [] 1144 for name, value in self._headers: 1145 if name.lower().startswith('content-'): 1146 part_headers.append((name, value)) 1147 else: 1148 keep_headers.append((name, value)) 1149 if part_headers: 1150 # There is existing content, move it to the first subpart. 1151 part = type(self)(policy=self.policy) 1152 part._headers = part_headers 1153 part._payload = self._payload 1154 self._payload = [part] 1155 else: 1156 self._payload = [] 1157 self._headers = keep_headers 1158 self['Content-Type'] = 'multipart/' + subtype 1159 if boundary is not None: 1160 self.set_param('boundary', boundary) 1161 1162 def make_related(self, boundary=None): 1163 self._make_multipart('related', ('alternative', 'mixed'), boundary) 1164 1165 def make_alternative(self, boundary=None): 1166 self._make_multipart('alternative', ('mixed',), boundary) 1167 1168 def make_mixed(self, boundary=None): 1169 self._make_multipart('mixed', (), boundary) 1170 1171 def _add_multipart(self, _subtype, *args, _disp=None, **kw): 1172 if (self.get_content_maintype() != 'multipart' or 1173 self.get_content_subtype() != _subtype): 1174 getattr(self, 'make_' + _subtype)() 1175 part = type(self)(policy=self.policy) 1176 part.set_content(*args, **kw) 1177 if _disp and 'content-disposition' not in part: 1178 part['Content-Disposition'] = _disp 1179 self.attach(part) 1180 1181 def add_related(self, *args, **kw): 1182 self._add_multipart('related', *args, _disp='inline', **kw) 1183 1184 def add_alternative(self, *args, **kw): 1185 self._add_multipart('alternative', *args, **kw) 1186 1187 def add_attachment(self, *args, **kw): 1188 self._add_multipart('mixed', *args, _disp='attachment', **kw) 1189 1190 def clear(self): 1191 self._headers = [] 1192 self._payload = None 1193 1194 def clear_content(self): 1195 self._headers = [(n, v) for n, v in self._headers 1196 if not n.lower().startswith('content-')] 1197 self._payload = None 1198 1199 1200class EmailMessage(MIMEPart): 1201 1202 def set_content(self, *args, **kw): 1203 super().set_content(*args, **kw) 1204 if 'MIME-Version' not in self: 1205 self['MIME-Version'] = '1.0' 1206