• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message', 'EmailMessage']
8
9import binascii
10import re
11import quopri
12from io import BytesIO, StringIO
13
14# Intrapackage imports
15from email import utils
16from email import errors
17from email._policybase import compat32
18from email import charset as _charset
19from email._encoded_words import decode_b
20Charset = _charset.Charset
21
22SEMISPACE = '; '
23
24# Regular expression that matches `special' characters in parameters, the
25# existence of which force quoting of the parameter value.
26tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
27
28
29def _splitparam(param):
30    # Split header parameters.  BAW: this may be too simple.  It isn't
31    # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
32    # found in the wild.  We may eventually need a full fledged parser.
33    # RDM: we might have a Header here; for now just stringify it.
34    a, sep, b = str(param).partition(';')
35    if not sep:
36        return a.strip(), None
37    return a.strip(), b.strip()
38
39def _formatparam(param, value=None, quote=True):
40    """Convenience function to format and return a key=value pair.
41
42    This will quote the value if needed or if quote is true.  If value is a
43    three tuple (charset, language, value), it will be encoded according
44    to RFC2231 rules.  If it contains non-ascii characters it will likewise
45    be encoded according to RFC2231 rules, using the utf-8 charset and
46    a null language.
47    """
48    if value is not None and len(value) > 0:
49        # A tuple is used for RFC 2231 encoded parameter values where items
50        # are (charset, language, value).  charset is a string, not a Charset
51        # instance.  RFC 2231 encoded values are never quoted, per RFC.
52        if isinstance(value, tuple):
53            # Encode as per RFC 2231
54            param += '*'
55            value = utils.encode_rfc2231(value[2], value[0], value[1])
56            return '%s=%s' % (param, value)
57        else:
58            try:
59                value.encode('ascii')
60            except UnicodeEncodeError:
61                param += '*'
62                value = utils.encode_rfc2231(value, 'utf-8', '')
63                return '%s=%s' % (param, value)
64        # BAW: Please check this.  I think that if quote is set it should
65        # force quoting even if not necessary.
66        if quote or tspecials.search(value):
67            return '%s="%s"' % (param, utils.quote(value))
68        else:
69            return '%s=%s' % (param, value)
70    else:
71        return param
72
73def _parseparam(s):
74    # RDM This might be a Header, so for now stringify it.
75    s = ';' + str(s)
76    plist = []
77    while s[:1] == ';':
78        s = s[1:]
79        end = s.find(';')
80        while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
81            end = s.find(';', end + 1)
82        if end < 0:
83            end = len(s)
84        f = s[:end]
85        if '=' in f:
86            i = f.index('=')
87            f = f[:i].strip().lower() + '=' + f[i+1:].strip()
88        plist.append(f.strip())
89        s = s[end:]
90    return plist
91
92
93def _unquotevalue(value):
94    # This is different than utils.collapse_rfc2231_value() because it doesn't
95    # try to convert the value to a unicode.  Message.get_param() and
96    # Message.get_params() are both currently defined to return the tuple in
97    # the face of RFC 2231 parameters.
98    if isinstance(value, tuple):
99        return value[0], value[1], utils.unquote(value[2])
100    else:
101        return utils.unquote(value)
102
103
104def _decode_uu(encoded):
105    """Decode uuencoded data."""
106    decoded_lines = []
107    encoded_lines_iter = iter(encoded.splitlines())
108    for line in encoded_lines_iter:
109        if line.startswith(b"begin "):
110            mode, _, path = line.removeprefix(b"begin ").partition(b" ")
111            try:
112                int(mode, base=8)
113            except ValueError:
114                continue
115            else:
116                break
117    else:
118        raise ValueError("`begin` line not found")
119    for line in encoded_lines_iter:
120        if not line:
121            raise ValueError("Truncated input")
122        elif line.strip(b' \t\r\n\f') == b'end':
123            break
124        try:
125            decoded_line = binascii.a2b_uu(line)
126        except binascii.Error:
127            # Workaround for broken uuencoders by /Fredrik Lundh
128            nbytes = (((line[0]-32) & 63) * 4 + 5) // 3
129            decoded_line = binascii.a2b_uu(line[:nbytes])
130        decoded_lines.append(decoded_line)
131
132    return b''.join(decoded_lines)
133
134
135class Message:
136    """Basic message object.
137
138    A message object is defined as something that has a bunch of RFC 2822
139    headers and a payload.  It may optionally have an envelope header
140    (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
141    multipart or a message/rfc822), then the payload is a list of Message
142    objects, otherwise it is a string.
143
144    Message objects implement part of the `mapping' interface, which assumes
145    there is exactly one occurrence of the header per message.  Some headers
146    do in fact appear multiple times (e.g. Received) and for those headers,
147    you must use the explicit API to set or get all the headers.  Not all of
148    the mapping methods are implemented.
149    """
150    def __init__(self, policy=compat32):
151        self.policy = policy
152        self._headers = []
153        self._unixfrom = None
154        self._payload = None
155        self._charset = None
156        # Defaults for multipart messages
157        self.preamble = self.epilogue = None
158        self.defects = []
159        # Default content type
160        self._default_type = 'text/plain'
161
162    def __str__(self):
163        """Return the entire formatted message as a string.
164        """
165        return self.as_string()
166
167    def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
168        """Return the entire formatted message as a string.
169
170        Optional 'unixfrom', when true, means include the Unix From_ envelope
171        header.  For backward compatibility reasons, if maxheaderlen is
172        not specified it defaults to 0, so you must override it explicitly
173        if you want a different maxheaderlen.  'policy' is passed to the
174        Generator instance used to serialize the message; if it is not
175        specified the policy associated with the message instance is used.
176
177        If the message object contains binary data that is not encoded
178        according to RFC standards, the non-compliant data will be replaced by
179        unicode "unknown character" code points.
180        """
181        from email.generator import Generator
182        policy = self.policy if policy is None else policy
183        fp = StringIO()
184        g = Generator(fp,
185                      mangle_from_=False,
186                      maxheaderlen=maxheaderlen,
187                      policy=policy)
188        g.flatten(self, unixfrom=unixfrom)
189        return fp.getvalue()
190
191    def __bytes__(self):
192        """Return the entire formatted message as a bytes object.
193        """
194        return self.as_bytes()
195
196    def as_bytes(self, unixfrom=False, policy=None):
197        """Return the entire formatted message as a bytes object.
198
199        Optional 'unixfrom', when true, means include the Unix From_ envelope
200        header.  'policy' is passed to the BytesGenerator instance used to
201        serialize the message; if not specified the policy associated with
202        the message instance is used.
203        """
204        from email.generator import BytesGenerator
205        policy = self.policy if policy is None else policy
206        fp = BytesIO()
207        g = BytesGenerator(fp, mangle_from_=False, policy=policy)
208        g.flatten(self, unixfrom=unixfrom)
209        return fp.getvalue()
210
211    def is_multipart(self):
212        """Return True if the message consists of multiple parts."""
213        return isinstance(self._payload, list)
214
215    #
216    # Unix From_ line
217    #
218    def set_unixfrom(self, unixfrom):
219        self._unixfrom = unixfrom
220
221    def get_unixfrom(self):
222        return self._unixfrom
223
224    #
225    # Payload manipulation.
226    #
227    def attach(self, payload):
228        """Add the given payload to the current payload.
229
230        The current payload will always be a list of objects after this method
231        is called.  If you want to set the payload to a scalar object, use
232        set_payload() instead.
233        """
234        if self._payload is None:
235            self._payload = [payload]
236        else:
237            try:
238                self._payload.append(payload)
239            except AttributeError:
240                raise TypeError("Attach is not valid on a message with a"
241                                " non-multipart payload")
242
243    def get_payload(self, i=None, decode=False):
244        """Return a reference to the payload.
245
246        The payload will either be a list object or a string.  If you mutate
247        the list object, you modify the message's payload in place.  Optional
248        i returns that index into the payload.
249
250        Optional decode is a flag indicating whether the payload should be
251        decoded or not, according to the Content-Transfer-Encoding header
252        (default is False).
253
254        When True and the message is not a multipart, the payload will be
255        decoded if this header's value is `quoted-printable' or `base64'.  If
256        some other encoding is used, or the header is missing, or if the
257        payload has bogus data (i.e. bogus base64 or uuencoded data), the
258        payload is returned as-is.
259
260        If the message is a multipart and the decode flag is True, then None
261        is returned.
262        """
263        # Here is the logic table for this code, based on the email5.0.0 code:
264        #   i     decode  is_multipart  result
265        # ------  ------  ------------  ------------------------------
266        #  None   True    True          None
267        #   i     True    True          None
268        #  None   False   True          _payload (a list)
269        #   i     False   True          _payload element i (a Message)
270        #   i     False   False         error (not a list)
271        #   i     True    False         error (not a list)
272        #  None   False   False         _payload
273        #  None   True    False         _payload decoded (bytes)
274        # Note that Barry planned to factor out the 'decode' case, but that
275        # isn't so easy now that we handle the 8 bit data, which needs to be
276        # converted in both the decode and non-decode path.
277        if self.is_multipart():
278            if decode:
279                return None
280            if i is None:
281                return self._payload
282            else:
283                return self._payload[i]
284        # For backward compatibility, Use isinstance and this error message
285        # instead of the more logical is_multipart test.
286        if i is not None and not isinstance(self._payload, list):
287            raise TypeError('Expected list, got %s' % type(self._payload))
288        payload = self._payload
289        # cte might be a Header, so for now stringify it.
290        cte = str(self.get('content-transfer-encoding', '')).lower()
291        # payload may be bytes here.
292        if not decode:
293            if isinstance(payload, str) and utils._has_surrogates(payload):
294                try:
295                    bpayload = payload.encode('ascii', 'surrogateescape')
296                    try:
297                        payload = bpayload.decode(self.get_content_charset('ascii'), 'replace')
298                    except LookupError:
299                        payload = bpayload.decode('ascii', 'replace')
300                except UnicodeEncodeError:
301                    pass
302            return payload
303        if isinstance(payload, str):
304            try:
305                bpayload = payload.encode('ascii', 'surrogateescape')
306            except UnicodeEncodeError:
307                # This won't happen for RFC compliant messages (messages
308                # containing only ASCII code points in the unicode input).
309                # If it does happen, turn the string into bytes in a way
310                # guaranteed not to fail.
311                bpayload = payload.encode('raw-unicode-escape')
312        if cte == 'quoted-printable':
313            return quopri.decodestring(bpayload)
314        elif cte == 'base64':
315            # XXX: this is a bit of a hack; decode_b should probably be factored
316            # out somewhere, but I haven't figured out where yet.
317            value, defects = decode_b(b''.join(bpayload.splitlines()))
318            for defect in defects:
319                self.policy.handle_defect(self, defect)
320            return value
321        elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
322            try:
323                return _decode_uu(bpayload)
324            except ValueError:
325                # Some decoding problem.
326                return bpayload
327        if isinstance(payload, str):
328            return bpayload
329        return payload
330
331    def set_payload(self, payload, charset=None):
332        """Set the payload to the given value.
333
334        Optional charset sets the message's default character set.  See
335        set_charset() for details.
336        """
337        if hasattr(payload, 'encode'):
338            if charset is None:
339                self._payload = payload
340                return
341            if not isinstance(charset, Charset):
342                charset = Charset(charset)
343            payload = payload.encode(charset.output_charset, 'surrogateescape')
344        if hasattr(payload, 'decode'):
345            self._payload = payload.decode('ascii', 'surrogateescape')
346        else:
347            self._payload = payload
348        if charset is not None:
349            self.set_charset(charset)
350
351    def set_charset(self, charset):
352        """Set the charset of the payload to a given character set.
353
354        charset can be a Charset instance, a string naming a character set, or
355        None.  If it is a string it will be converted to a Charset instance.
356        If charset is None, the charset parameter will be removed from the
357        Content-Type field.  Anything else will generate a TypeError.
358
359        The message will be assumed to be of type text/* encoded with
360        charset.input_charset.  It will be converted to charset.output_charset
361        and encoded properly, if needed, when generating the plain text
362        representation of the message.  MIME headers (MIME-Version,
363        Content-Type, Content-Transfer-Encoding) will be added as needed.
364        """
365        if charset is None:
366            self.del_param('charset')
367            self._charset = None
368            return
369        if not isinstance(charset, Charset):
370            charset = Charset(charset)
371        self._charset = charset
372        if 'MIME-Version' not in self:
373            self.add_header('MIME-Version', '1.0')
374        if 'Content-Type' not in self:
375            self.add_header('Content-Type', 'text/plain',
376                            charset=charset.get_output_charset())
377        else:
378            self.set_param('charset', charset.get_output_charset())
379        if charset != charset.get_output_charset():
380            self._payload = charset.body_encode(self._payload)
381        if 'Content-Transfer-Encoding' not in self:
382            cte = charset.get_body_encoding()
383            try:
384                cte(self)
385            except TypeError:
386                # This 'if' is for backward compatibility, it allows unicode
387                # through even though that won't work correctly if the
388                # message is serialized.
389                payload = self._payload
390                if payload:
391                    try:
392                        payload = payload.encode('ascii', 'surrogateescape')
393                    except UnicodeError:
394                        payload = payload.encode(charset.output_charset)
395                self._payload = charset.body_encode(payload)
396                self.add_header('Content-Transfer-Encoding', cte)
397
398    def get_charset(self):
399        """Return the Charset instance associated with the message's payload.
400        """
401        return self._charset
402
403    #
404    # MAPPING INTERFACE (partial)
405    #
406    def __len__(self):
407        """Return the total number of headers, including duplicates."""
408        return len(self._headers)
409
410    def __getitem__(self, name):
411        """Get a header value.
412
413        Return None if the header is missing instead of raising an exception.
414
415        Note that if the header appeared multiple times, exactly which
416        occurrence gets returned is undefined.  Use get_all() to get all
417        the values matching a header field name.
418        """
419        return self.get(name)
420
421    def __setitem__(self, name, val):
422        """Set the value of a header.
423
424        Note: this does not overwrite an existing header with the same field
425        name.  Use __delitem__() first to delete any existing headers.
426        """
427        max_count = self.policy.header_max_count(name)
428        if max_count:
429            lname = name.lower()
430            found = 0
431            for k, v in self._headers:
432                if k.lower() == lname:
433                    found += 1
434                    if found >= max_count:
435                        raise ValueError("There may be at most {} {} headers "
436                                         "in a message".format(max_count, name))
437        self._headers.append(self.policy.header_store_parse(name, val))
438
439    def __delitem__(self, name):
440        """Delete all occurrences of a header, if present.
441
442        Does not raise an exception if the header is missing.
443        """
444        name = name.lower()
445        newheaders = []
446        for k, v in self._headers:
447            if k.lower() != name:
448                newheaders.append((k, v))
449        self._headers = newheaders
450
451    def __contains__(self, name):
452        name_lower = name.lower()
453        for k, v in self._headers:
454            if name_lower == k.lower():
455                return True
456        return False
457
458    def __iter__(self):
459        for field, value in self._headers:
460            yield field
461
462    def keys(self):
463        """Return a list of all the message's header field names.
464
465        These will be sorted in the order they appeared in the original
466        message, or were added to the message, and may contain duplicates.
467        Any fields deleted and re-inserted are always appended to the header
468        list.
469        """
470        return [k for k, v in self._headers]
471
472    def values(self):
473        """Return a list of all the message's header values.
474
475        These will be sorted in the order they appeared in the original
476        message, or were added to the message, and may contain duplicates.
477        Any fields deleted and re-inserted are always appended to the header
478        list.
479        """
480        return [self.policy.header_fetch_parse(k, v)
481                for k, v in self._headers]
482
483    def items(self):
484        """Get all the message's header fields and values.
485
486        These will be sorted in the order they appeared in the original
487        message, or were added to the message, and may contain duplicates.
488        Any fields deleted and re-inserted are always appended to the header
489        list.
490        """
491        return [(k, self.policy.header_fetch_parse(k, v))
492                for k, v in self._headers]
493
494    def get(self, name, failobj=None):
495        """Get a header value.
496
497        Like __getitem__() but return failobj instead of None when the field
498        is missing.
499        """
500        name = name.lower()
501        for k, v in self._headers:
502            if k.lower() == name:
503                return self.policy.header_fetch_parse(k, v)
504        return failobj
505
506    #
507    # "Internal" methods (public API, but only intended for use by a parser
508    # or generator, not normal application code.
509    #
510
511    def set_raw(self, name, value):
512        """Store name and value in the model without modification.
513
514        This is an "internal" API, intended only for use by a parser.
515        """
516        self._headers.append((name, value))
517
518    def raw_items(self):
519        """Return the (name, value) header pairs without modification.
520
521        This is an "internal" API, intended only for use by a generator.
522        """
523        return iter(self._headers.copy())
524
525    #
526    # Additional useful stuff
527    #
528
529    def get_all(self, name, failobj=None):
530        """Return a list of all the values for the named field.
531
532        These will be sorted in the order they appeared in the original
533        message, and may contain duplicates.  Any fields deleted and
534        re-inserted are always appended to the header list.
535
536        If no such fields exist, failobj is returned (defaults to None).
537        """
538        values = []
539        name = name.lower()
540        for k, v in self._headers:
541            if k.lower() == name:
542                values.append(self.policy.header_fetch_parse(k, v))
543        if not values:
544            return failobj
545        return values
546
547    def add_header(self, _name, _value, **_params):
548        """Extended header setting.
549
550        name is the header field to add.  keyword arguments can be used to set
551        additional parameters for the header field, with underscores converted
552        to dashes.  Normally the parameter will be added as key="value" unless
553        value is None, in which case only the key will be added.  If a
554        parameter value contains non-ASCII characters it can be specified as a
555        three-tuple of (charset, language, value), in which case it will be
556        encoded according to RFC2231 rules.  Otherwise it will be encoded using
557        the utf-8 charset and a language of ''.
558
559        Examples:
560
561        msg.add_header('content-disposition', 'attachment', filename='bud.gif')
562        msg.add_header('content-disposition', 'attachment',
563                       filename=('utf-8', '', Fußballer.ppt'))
564        msg.add_header('content-disposition', 'attachment',
565                       filename='Fußballer.ppt'))
566        """
567        parts = []
568        for k, v in _params.items():
569            if v is None:
570                parts.append(k.replace('_', '-'))
571            else:
572                parts.append(_formatparam(k.replace('_', '-'), v))
573        if _value is not None:
574            parts.insert(0, _value)
575        self[_name] = SEMISPACE.join(parts)
576
577    def replace_header(self, _name, _value):
578        """Replace a header.
579
580        Replace the first matching header found in the message, retaining
581        header order and case.  If no matching header was found, a KeyError is
582        raised.
583        """
584        _name = _name.lower()
585        for i, (k, v) in zip(range(len(self._headers)), self._headers):
586            if k.lower() == _name:
587                self._headers[i] = self.policy.header_store_parse(k, _value)
588                break
589        else:
590            raise KeyError(_name)
591
592    #
593    # Use these three methods instead of the three above.
594    #
595
596    def get_content_type(self):
597        """Return the message's content type.
598
599        The returned string is coerced to lower case of the form
600        `maintype/subtype'.  If there was no Content-Type header in the
601        message, the default type as given by get_default_type() will be
602        returned.  Since according to RFC 2045, messages always have a default
603        type this will always return a value.
604
605        RFC 2045 defines a message's default type to be text/plain unless it
606        appears inside a multipart/digest container, in which case it would be
607        message/rfc822.
608        """
609        missing = object()
610        value = self.get('content-type', missing)
611        if value is missing:
612            # This should have no parameters
613            return self.get_default_type()
614        ctype = _splitparam(value)[0].lower()
615        # RFC 2045, section 5.2 says if its invalid, use text/plain
616        if ctype.count('/') != 1:
617            return 'text/plain'
618        return ctype
619
620    def get_content_maintype(self):
621        """Return the message's main content type.
622
623        This is the `maintype' part of the string returned by
624        get_content_type().
625        """
626        ctype = self.get_content_type()
627        return ctype.split('/')[0]
628
629    def get_content_subtype(self):
630        """Returns the message's sub-content type.
631
632        This is the `subtype' part of the string returned by
633        get_content_type().
634        """
635        ctype = self.get_content_type()
636        return ctype.split('/')[1]
637
638    def get_default_type(self):
639        """Return the `default' content type.
640
641        Most messages have a default content type of text/plain, except for
642        messages that are subparts of multipart/digest containers.  Such
643        subparts have a default content type of message/rfc822.
644        """
645        return self._default_type
646
647    def set_default_type(self, ctype):
648        """Set the `default' content type.
649
650        ctype should be either "text/plain" or "message/rfc822", although this
651        is not enforced.  The default content type is not stored in the
652        Content-Type header.
653        """
654        self._default_type = ctype
655
656    def _get_params_preserve(self, failobj, header):
657        # Like get_params() but preserves the quoting of values.  BAW:
658        # should this be part of the public interface?
659        missing = object()
660        value = self.get(header, missing)
661        if value is missing:
662            return failobj
663        params = []
664        for p in _parseparam(value):
665            try:
666                name, val = p.split('=', 1)
667                name = name.strip()
668                val = val.strip()
669            except ValueError:
670                # Must have been a bare attribute
671                name = p.strip()
672                val = ''
673            params.append((name, val))
674        params = utils.decode_params(params)
675        return params
676
677    def get_params(self, failobj=None, header='content-type', unquote=True):
678        """Return the message's Content-Type parameters, as a list.
679
680        The elements of the returned list are 2-tuples of key/value pairs, as
681        split on the `=' sign.  The left hand side of the `=' is the key,
682        while the right hand side is the value.  If there is no `=' sign in
683        the parameter the value is the empty string.  The value is as
684        described in the get_param() method.
685
686        Optional failobj is the object to return if there is no Content-Type
687        header.  Optional header is the header to search instead of
688        Content-Type.  If unquote is True, the value is unquoted.
689        """
690        missing = object()
691        params = self._get_params_preserve(missing, header)
692        if params is missing:
693            return failobj
694        if unquote:
695            return [(k, _unquotevalue(v)) for k, v in params]
696        else:
697            return params
698
699    def get_param(self, param, failobj=None, header='content-type',
700                  unquote=True):
701        """Return the parameter value if found in the Content-Type header.
702
703        Optional failobj is the object to return if there is no Content-Type
704        header, or the Content-Type header has no such parameter.  Optional
705        header is the header to search instead of Content-Type.
706
707        Parameter keys are always compared case insensitively.  The return
708        value can either be a string, or a 3-tuple if the parameter was RFC
709        2231 encoded.  When it's a 3-tuple, the elements of the value are of
710        the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
711        LANGUAGE can be None, in which case you should consider VALUE to be
712        encoded in the us-ascii charset.  You can usually ignore LANGUAGE.
713        The parameter value (either the returned string, or the VALUE item in
714        the 3-tuple) is always unquoted, unless unquote is set to False.
715
716        If your application doesn't care whether the parameter was RFC 2231
717        encoded, it can turn the return value into a string as follows:
718
719            rawparam = msg.get_param('foo')
720            param = email.utils.collapse_rfc2231_value(rawparam)
721
722        """
723        if header not in self:
724            return failobj
725        for k, v in self._get_params_preserve(failobj, header):
726            if k.lower() == param.lower():
727                if unquote:
728                    return _unquotevalue(v)
729                else:
730                    return v
731        return failobj
732
733    def set_param(self, param, value, header='Content-Type', requote=True,
734                  charset=None, language='', replace=False):
735        """Set a parameter in the Content-Type header.
736
737        If the parameter already exists in the header, its value will be
738        replaced with the new value.
739
740        If header is Content-Type and has not yet been defined for this
741        message, it will be set to "text/plain" and the new parameter and
742        value will be appended as per RFC 2045.
743
744        An alternate header can be specified in the header argument, and all
745        parameters will be quoted as necessary unless requote is False.
746
747        If charset is specified, the parameter will be encoded according to RFC
748        2231.  Optional language specifies the RFC 2231 language, defaulting
749        to the empty string.  Both charset and language should be strings.
750        """
751        if not isinstance(value, tuple) and charset:
752            value = (charset, language, value)
753
754        if header not in self and header.lower() == 'content-type':
755            ctype = 'text/plain'
756        else:
757            ctype = self.get(header)
758        if not self.get_param(param, header=header):
759            if not ctype:
760                ctype = _formatparam(param, value, requote)
761            else:
762                ctype = SEMISPACE.join(
763                    [ctype, _formatparam(param, value, requote)])
764        else:
765            ctype = ''
766            for old_param, old_value in self.get_params(header=header,
767                                                        unquote=requote):
768                append_param = ''
769                if old_param.lower() == param.lower():
770                    append_param = _formatparam(param, value, requote)
771                else:
772                    append_param = _formatparam(old_param, old_value, requote)
773                if not ctype:
774                    ctype = append_param
775                else:
776                    ctype = SEMISPACE.join([ctype, append_param])
777        if ctype != self.get(header):
778            if replace:
779                self.replace_header(header, ctype)
780            else:
781                del self[header]
782                self[header] = ctype
783
784    def del_param(self, param, header='content-type', requote=True):
785        """Remove the given parameter completely from the Content-Type header.
786
787        The header will be re-written in place without the parameter or its
788        value. All values will be quoted as necessary unless requote is
789        False.  Optional header specifies an alternative to the Content-Type
790        header.
791        """
792        if header not in self:
793            return
794        new_ctype = ''
795        for p, v in self.get_params(header=header, unquote=requote):
796            if p.lower() != param.lower():
797                if not new_ctype:
798                    new_ctype = _formatparam(p, v, requote)
799                else:
800                    new_ctype = SEMISPACE.join([new_ctype,
801                                                _formatparam(p, v, requote)])
802        if new_ctype != self.get(header):
803            del self[header]
804            self[header] = new_ctype
805
806    def set_type(self, type, header='Content-Type', requote=True):
807        """Set the main type and subtype for the Content-Type header.
808
809        type must be a string in the form "maintype/subtype", otherwise a
810        ValueError is raised.
811
812        This method replaces the Content-Type header, keeping all the
813        parameters in place.  If requote is False, this leaves the existing
814        header's quoting as is.  Otherwise, the parameters will be quoted (the
815        default).
816
817        An alternative header can be specified in the header argument.  When
818        the Content-Type header is set, we'll always also add a MIME-Version
819        header.
820        """
821        # BAW: should we be strict?
822        if not type.count('/') == 1:
823            raise ValueError
824        # Set the Content-Type, you get a MIME-Version
825        if header.lower() == 'content-type':
826            del self['mime-version']
827            self['MIME-Version'] = '1.0'
828        if header not in self:
829            self[header] = type
830            return
831        params = self.get_params(header=header, unquote=requote)
832        del self[header]
833        self[header] = type
834        # Skip the first param; it's the old type.
835        for p, v in params[1:]:
836            self.set_param(p, v, header, requote)
837
838    def get_filename(self, failobj=None):
839        """Return the filename associated with the payload if present.
840
841        The filename is extracted from the Content-Disposition header's
842        `filename' parameter, and it is unquoted.  If that header is missing
843        the `filename' parameter, this method falls back to looking for the
844        `name' parameter.
845        """
846        missing = object()
847        filename = self.get_param('filename', missing, 'content-disposition')
848        if filename is missing:
849            filename = self.get_param('name', missing, 'content-type')
850        if filename is missing:
851            return failobj
852        return utils.collapse_rfc2231_value(filename).strip()
853
854    def get_boundary(self, failobj=None):
855        """Return the boundary associated with the payload if present.
856
857        The boundary is extracted from the Content-Type header's `boundary'
858        parameter, and it is unquoted.
859        """
860        missing = object()
861        boundary = self.get_param('boundary', missing)
862        if boundary is missing:
863            return failobj
864        # RFC 2046 says that boundaries may begin but not end in w/s
865        return utils.collapse_rfc2231_value(boundary).rstrip()
866
867    def set_boundary(self, boundary):
868        """Set the boundary parameter in Content-Type to 'boundary'.
869
870        This is subtly different than deleting the Content-Type header and
871        adding a new one with a new boundary parameter via add_header().  The
872        main difference is that using the set_boundary() method preserves the
873        order of the Content-Type header in the original message.
874
875        HeaderParseError is raised if the message has no Content-Type header.
876        """
877        missing = object()
878        params = self._get_params_preserve(missing, 'content-type')
879        if params is missing:
880            # There was no Content-Type header, and we don't know what type
881            # to set it to, so raise an exception.
882            raise errors.HeaderParseError('No Content-Type header found')
883        newparams = []
884        foundp = False
885        for pk, pv in params:
886            if pk.lower() == 'boundary':
887                newparams.append(('boundary', '"%s"' % boundary))
888                foundp = True
889            else:
890                newparams.append((pk, pv))
891        if not foundp:
892            # The original Content-Type header had no boundary attribute.
893            # Tack one on the end.  BAW: should we raise an exception
894            # instead???
895            newparams.append(('boundary', '"%s"' % boundary))
896        # Replace the existing Content-Type header with the new value
897        newheaders = []
898        for h, v in self._headers:
899            if h.lower() == 'content-type':
900                parts = []
901                for k, v in newparams:
902                    if v == '':
903                        parts.append(k)
904                    else:
905                        parts.append('%s=%s' % (k, v))
906                val = SEMISPACE.join(parts)
907                newheaders.append(self.policy.header_store_parse(h, val))
908
909            else:
910                newheaders.append((h, v))
911        self._headers = newheaders
912
913    def get_content_charset(self, failobj=None):
914        """Return the charset parameter of the Content-Type header.
915
916        The returned string is always coerced to lower case.  If there is no
917        Content-Type header, or if that header has no charset parameter,
918        failobj is returned.
919        """
920        missing = object()
921        charset = self.get_param('charset', missing)
922        if charset is missing:
923            return failobj
924        if isinstance(charset, tuple):
925            # RFC 2231 encoded, so decode it, and it better end up as ascii.
926            pcharset = charset[0] or 'us-ascii'
927            try:
928                # LookupError will be raised if the charset isn't known to
929                # Python.  UnicodeError will be raised if the encoded text
930                # contains a character not in the charset.
931                as_bytes = charset[2].encode('raw-unicode-escape')
932                charset = str(as_bytes, pcharset)
933            except (LookupError, UnicodeError):
934                charset = charset[2]
935        # charset characters must be in us-ascii range
936        try:
937            charset.encode('us-ascii')
938        except UnicodeError:
939            return failobj
940        # RFC 2046, $4.1.2 says charsets are not case sensitive
941        return charset.lower()
942
943    def get_charsets(self, failobj=None):
944        """Return a list containing the charset(s) used in this message.
945
946        The returned list of items describes the Content-Type headers'
947        charset parameter for this message and all the subparts in its
948        payload.
949
950        Each item will either be a string (the value of the charset parameter
951        in the Content-Type header of that part) or the value of the
952        'failobj' parameter (defaults to None), if the part does not have a
953        main MIME type of "text", or the charset is not defined.
954
955        The list will contain one string for each part of the message, plus
956        one for the container message (i.e. self), so that a non-multipart
957        message will still return a list of length 1.
958        """
959        return [part.get_content_charset(failobj) for part in self.walk()]
960
961    def get_content_disposition(self):
962        """Return the message's content-disposition if it exists, or None.
963
964        The return values can be either 'inline', 'attachment' or None
965        according to the rfc2183.
966        """
967        value = self.get('content-disposition')
968        if value is None:
969            return None
970        c_d = _splitparam(value)[0].lower()
971        return c_d
972
973    # I.e. def walk(self): ...
974    from email.iterators import walk
975
976
977class MIMEPart(Message):
978
979    def __init__(self, policy=None):
980        if policy is None:
981            from email.policy import default
982            policy = default
983        super().__init__(policy)
984
985
986    def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
987        """Return the entire formatted message as a string.
988
989        Optional 'unixfrom', when true, means include the Unix From_ envelope
990        header.  maxheaderlen is retained for backward compatibility with the
991        base Message class, but defaults to None, meaning that the policy value
992        for max_line_length controls the header maximum length.  'policy' is
993        passed to the Generator instance used to serialize the message; if it
994        is not specified the policy associated with the message instance is
995        used.
996        """
997        policy = self.policy if policy is None else policy
998        if maxheaderlen is None:
999            maxheaderlen = policy.max_line_length
1000        return super().as_string(unixfrom, maxheaderlen, policy)
1001
1002    def __str__(self):
1003        return self.as_string(policy=self.policy.clone(utf8=True))
1004
1005    def is_attachment(self):
1006        c_d = self.get('content-disposition')
1007        return False if c_d is None else c_d.content_disposition == 'attachment'
1008
1009    def _find_body(self, part, preferencelist):
1010        if part.is_attachment():
1011            return
1012        maintype, subtype = part.get_content_type().split('/')
1013        if maintype == 'text':
1014            if subtype in preferencelist:
1015                yield (preferencelist.index(subtype), part)
1016            return
1017        if maintype != 'multipart' or not self.is_multipart():
1018            return
1019        if subtype != 'related':
1020            for subpart in part.iter_parts():
1021                yield from self._find_body(subpart, preferencelist)
1022            return
1023        if 'related' in preferencelist:
1024            yield (preferencelist.index('related'), part)
1025        candidate = None
1026        start = part.get_param('start')
1027        if start:
1028            for subpart in part.iter_parts():
1029                if subpart['content-id'] == start:
1030                    candidate = subpart
1031                    break
1032        if candidate is None:
1033            subparts = part.get_payload()
1034            candidate = subparts[0] if subparts else None
1035        if candidate is not None:
1036            yield from self._find_body(candidate, preferencelist)
1037
1038    def get_body(self, preferencelist=('related', 'html', 'plain')):
1039        """Return best candidate mime part for display as 'body' of message.
1040
1041        Do a depth first search, starting with self, looking for the first part
1042        matching each of the items in preferencelist, and return the part
1043        corresponding to the first item that has a match, or None if no items
1044        have a match.  If 'related' is not included in preferencelist, consider
1045        the root part of any multipart/related encountered as a candidate
1046        match.  Ignore parts with 'Content-Disposition: attachment'.
1047        """
1048        best_prio = len(preferencelist)
1049        body = None
1050        for prio, part in self._find_body(self, preferencelist):
1051            if prio < best_prio:
1052                best_prio = prio
1053                body = part
1054                if prio == 0:
1055                    break
1056        return body
1057
1058    _body_types = {('text', 'plain'),
1059                   ('text', 'html'),
1060                   ('multipart', 'related'),
1061                   ('multipart', 'alternative')}
1062    def iter_attachments(self):
1063        """Return an iterator over the non-main parts of a multipart.
1064
1065        Skip the first of each occurrence of text/plain, text/html,
1066        multipart/related, or multipart/alternative in the multipart (unless
1067        they have a 'Content-Disposition: attachment' header) and include all
1068        remaining subparts in the returned iterator.  When applied to a
1069        multipart/related, return all parts except the root part.  Return an
1070        empty iterator when applied to a multipart/alternative or a
1071        non-multipart.
1072        """
1073        maintype, subtype = self.get_content_type().split('/')
1074        if maintype != 'multipart' or subtype == 'alternative':
1075            return
1076        payload = self.get_payload()
1077        # Certain malformed messages can have content type set to `multipart/*`
1078        # but still have single part body, in which case payload.copy() can
1079        # fail with AttributeError.
1080        try:
1081            parts = payload.copy()
1082        except AttributeError:
1083            # payload is not a list, it is most probably a string.
1084            return
1085
1086        if maintype == 'multipart' and subtype == 'related':
1087            # For related, we treat everything but the root as an attachment.
1088            # The root may be indicated by 'start'; if there's no start or we
1089            # can't find the named start, treat the first subpart as the root.
1090            start = self.get_param('start')
1091            if start:
1092                found = False
1093                attachments = []
1094                for part in parts:
1095                    if part.get('content-id') == start:
1096                        found = True
1097                    else:
1098                        attachments.append(part)
1099                if found:
1100                    yield from attachments
1101                    return
1102            parts.pop(0)
1103            yield from parts
1104            return
1105        # Otherwise we more or less invert the remaining logic in get_body.
1106        # This only really works in edge cases (ex: non-text related or
1107        # alternatives) if the sending agent sets content-disposition.
1108        seen = []   # Only skip the first example of each candidate type.
1109        for part in parts:
1110            maintype, subtype = part.get_content_type().split('/')
1111            if ((maintype, subtype) in self._body_types and
1112                    not part.is_attachment() and subtype not in seen):
1113                seen.append(subtype)
1114                continue
1115            yield part
1116
1117    def iter_parts(self):
1118        """Return an iterator over all immediate subparts of a multipart.
1119
1120        Return an empty iterator for a non-multipart.
1121        """
1122        if self.is_multipart():
1123            yield from self.get_payload()
1124
1125    def get_content(self, *args, content_manager=None, **kw):
1126        if content_manager is None:
1127            content_manager = self.policy.content_manager
1128        return content_manager.get_content(self, *args, **kw)
1129
1130    def set_content(self, *args, content_manager=None, **kw):
1131        if content_manager is None:
1132            content_manager = self.policy.content_manager
1133        content_manager.set_content(self, *args, **kw)
1134
1135    def _make_multipart(self, subtype, disallowed_subtypes, boundary):
1136        if self.get_content_maintype() == 'multipart':
1137            existing_subtype = self.get_content_subtype()
1138            disallowed_subtypes = disallowed_subtypes + (subtype,)
1139            if existing_subtype in disallowed_subtypes:
1140                raise ValueError("Cannot convert {} to {}".format(
1141                    existing_subtype, subtype))
1142        keep_headers = []
1143        part_headers = []
1144        for name, value in self._headers:
1145            if name.lower().startswith('content-'):
1146                part_headers.append((name, value))
1147            else:
1148                keep_headers.append((name, value))
1149        if part_headers:
1150            # There is existing content, move it to the first subpart.
1151            part = type(self)(policy=self.policy)
1152            part._headers = part_headers
1153            part._payload = self._payload
1154            self._payload = [part]
1155        else:
1156            self._payload = []
1157        self._headers = keep_headers
1158        self['Content-Type'] = 'multipart/' + subtype
1159        if boundary is not None:
1160            self.set_param('boundary', boundary)
1161
1162    def make_related(self, boundary=None):
1163        self._make_multipart('related', ('alternative', 'mixed'), boundary)
1164
1165    def make_alternative(self, boundary=None):
1166        self._make_multipart('alternative', ('mixed',), boundary)
1167
1168    def make_mixed(self, boundary=None):
1169        self._make_multipart('mixed', (), boundary)
1170
1171    def _add_multipart(self, _subtype, *args, _disp=None, **kw):
1172        if (self.get_content_maintype() != 'multipart' or
1173                self.get_content_subtype() != _subtype):
1174            getattr(self, 'make_' + _subtype)()
1175        part = type(self)(policy=self.policy)
1176        part.set_content(*args, **kw)
1177        if _disp and 'content-disposition' not in part:
1178            part['Content-Disposition'] = _disp
1179        self.attach(part)
1180
1181    def add_related(self, *args, **kw):
1182        self._add_multipart('related', *args, _disp='inline', **kw)
1183
1184    def add_alternative(self, *args, **kw):
1185        self._add_multipart('alternative', *args, **kw)
1186
1187    def add_attachment(self, *args, **kw):
1188        self._add_multipart('mixed', *args, _disp='attachment', **kw)
1189
1190    def clear(self):
1191        self._headers = []
1192        self._payload = None
1193
1194    def clear_content(self):
1195        self._headers = [(n, v) for n, v in self._headers
1196                         if not n.lower().startswith('content-')]
1197        self._payload = None
1198
1199
1200class EmailMessage(MIMEPart):
1201
1202    def set_content(self, *args, **kw):
1203        super().set_content(*args, **kw)
1204        if 'MIME-Version' not in self:
1205            self['MIME-Version'] = '1.0'
1206