• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message', 'EmailMessage']
8
9import re
10import uu
11import quopri
12from io import BytesIO, StringIO
13
14# Intrapackage imports
15from email import utils
16from email import errors
17from email._policybase import Policy, compat32
18from email import charset as _charset
19from email._encoded_words import decode_b
20Charset = _charset.Charset
21
22SEMISPACE = '; '
23
24# Regular expression that matches `special' characters in parameters, the
25# existence of which force quoting of the parameter value.
26tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
27
28
29def _splitparam(param):
30    # Split header parameters.  BAW: this may be too simple.  It isn't
31    # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
32    # found in the wild.  We may eventually need a full fledged parser.
33    # RDM: we might have a Header here; for now just stringify it.
34    a, sep, b = str(param).partition(';')
35    if not sep:
36        return a.strip(), None
37    return a.strip(), b.strip()
38
39def _formatparam(param, value=None, quote=True):
40    """Convenience function to format and return a key=value pair.
41
42    This will quote the value if needed or if quote is true.  If value is a
43    three tuple (charset, language, value), it will be encoded according
44    to RFC2231 rules.  If it contains non-ascii characters it will likewise
45    be encoded according to RFC2231 rules, using the utf-8 charset and
46    a null language.
47    """
48    if value is not None and len(value) > 0:
49        # A tuple is used for RFC 2231 encoded parameter values where items
50        # are (charset, language, value).  charset is a string, not a Charset
51        # instance.  RFC 2231 encoded values are never quoted, per RFC.
52        if isinstance(value, tuple):
53            # Encode as per RFC 2231
54            param += '*'
55            value = utils.encode_rfc2231(value[2], value[0], value[1])
56            return '%s=%s' % (param, value)
57        else:
58            try:
59                value.encode('ascii')
60            except UnicodeEncodeError:
61                param += '*'
62                value = utils.encode_rfc2231(value, 'utf-8', '')
63                return '%s=%s' % (param, value)
64        # BAW: Please check this.  I think that if quote is set it should
65        # force quoting even if not necessary.
66        if quote or tspecials.search(value):
67            return '%s="%s"' % (param, utils.quote(value))
68        else:
69            return '%s=%s' % (param, value)
70    else:
71        return param
72
73def _parseparam(s):
74    # RDM This might be a Header, so for now stringify it.
75    s = ';' + str(s)
76    plist = []
77    while s[:1] == ';':
78        s = s[1:]
79        end = s.find(';')
80        while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
81            end = s.find(';', end + 1)
82        if end < 0:
83            end = len(s)
84        f = s[:end]
85        if '=' in f:
86            i = f.index('=')
87            f = f[:i].strip().lower() + '=' + f[i+1:].strip()
88        plist.append(f.strip())
89        s = s[end:]
90    return plist
91
92
93def _unquotevalue(value):
94    # This is different than utils.collapse_rfc2231_value() because it doesn't
95    # try to convert the value to a unicode.  Message.get_param() and
96    # Message.get_params() are both currently defined to return the tuple in
97    # the face of RFC 2231 parameters.
98    if isinstance(value, tuple):
99        return value[0], value[1], utils.unquote(value[2])
100    else:
101        return utils.unquote(value)
102
103
104
105class Message:
106    """Basic message object.
107
108    A message object is defined as something that has a bunch of RFC 2822
109    headers and a payload.  It may optionally have an envelope header
110    (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
111    multipart or a message/rfc822), then the payload is a list of Message
112    objects, otherwise it is a string.
113
114    Message objects implement part of the `mapping' interface, which assumes
115    there is exactly one occurrence of the header per message.  Some headers
116    do in fact appear multiple times (e.g. Received) and for those headers,
117    you must use the explicit API to set or get all the headers.  Not all of
118    the mapping methods are implemented.
119    """
120    def __init__(self, policy=compat32):
121        self.policy = policy
122        self._headers = []
123        self._unixfrom = None
124        self._payload = None
125        self._charset = None
126        # Defaults for multipart messages
127        self.preamble = self.epilogue = None
128        self.defects = []
129        # Default content type
130        self._default_type = 'text/plain'
131
132    def __str__(self):
133        """Return the entire formatted message as a string.
134        """
135        return self.as_string()
136
137    def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
138        """Return the entire formatted message as a string.
139
140        Optional 'unixfrom', when true, means include the Unix From_ envelope
141        header.  For backward compatibility reasons, if maxheaderlen is
142        not specified it defaults to 0, so you must override it explicitly
143        if you want a different maxheaderlen.  'policy' is passed to the
144        Generator instance used to serialize the message; if it is not
145        specified the policy associated with the message instance is used.
146
147        If the message object contains binary data that is not encoded
148        according to RFC standards, the non-compliant data will be replaced by
149        unicode "unknown character" code points.
150        """
151        from email.generator import Generator
152        policy = self.policy if policy is None else policy
153        fp = StringIO()
154        g = Generator(fp,
155                      mangle_from_=False,
156                      maxheaderlen=maxheaderlen,
157                      policy=policy)
158        g.flatten(self, unixfrom=unixfrom)
159        return fp.getvalue()
160
161    def __bytes__(self):
162        """Return the entire formatted message as a bytes object.
163        """
164        return self.as_bytes()
165
166    def as_bytes(self, unixfrom=False, policy=None):
167        """Return the entire formatted message as a bytes object.
168
169        Optional 'unixfrom', when true, means include the Unix From_ envelope
170        header.  'policy' is passed to the BytesGenerator instance used to
171        serialize the message; if not specified the policy associated with
172        the message instance is used.
173        """
174        from email.generator import BytesGenerator
175        policy = self.policy if policy is None else policy
176        fp = BytesIO()
177        g = BytesGenerator(fp, mangle_from_=False, policy=policy)
178        g.flatten(self, unixfrom=unixfrom)
179        return fp.getvalue()
180
181    def is_multipart(self):
182        """Return True if the message consists of multiple parts."""
183        return isinstance(self._payload, list)
184
185    #
186    # Unix From_ line
187    #
188    def set_unixfrom(self, unixfrom):
189        self._unixfrom = unixfrom
190
191    def get_unixfrom(self):
192        return self._unixfrom
193
194    #
195    # Payload manipulation.
196    #
197    def attach(self, payload):
198        """Add the given payload to the current payload.
199
200        The current payload will always be a list of objects after this method
201        is called.  If you want to set the payload to a scalar object, use
202        set_payload() instead.
203        """
204        if self._payload is None:
205            self._payload = [payload]
206        else:
207            try:
208                self._payload.append(payload)
209            except AttributeError:
210                raise TypeError("Attach is not valid on a message with a"
211                                " non-multipart payload")
212
213    def get_payload(self, i=None, decode=False):
214        """Return a reference to the payload.
215
216        The payload will either be a list object or a string.  If you mutate
217        the list object, you modify the message's payload in place.  Optional
218        i returns that index into the payload.
219
220        Optional decode is a flag indicating whether the payload should be
221        decoded or not, according to the Content-Transfer-Encoding header
222        (default is False).
223
224        When True and the message is not a multipart, the payload will be
225        decoded if this header's value is `quoted-printable' or `base64'.  If
226        some other encoding is used, or the header is missing, or if the
227        payload has bogus data (i.e. bogus base64 or uuencoded data), the
228        payload is returned as-is.
229
230        If the message is a multipart and the decode flag is True, then None
231        is returned.
232        """
233        # Here is the logic table for this code, based on the email5.0.0 code:
234        #   i     decode  is_multipart  result
235        # ------  ------  ------------  ------------------------------
236        #  None   True    True          None
237        #   i     True    True          None
238        #  None   False   True          _payload (a list)
239        #   i     False   True          _payload element i (a Message)
240        #   i     False   False         error (not a list)
241        #   i     True    False         error (not a list)
242        #  None   False   False         _payload
243        #  None   True    False         _payload decoded (bytes)
244        # Note that Barry planned to factor out the 'decode' case, but that
245        # isn't so easy now that we handle the 8 bit data, which needs to be
246        # converted in both the decode and non-decode path.
247        if self.is_multipart():
248            if decode:
249                return None
250            if i is None:
251                return self._payload
252            else:
253                return self._payload[i]
254        # For backward compatibility, Use isinstance and this error message
255        # instead of the more logical is_multipart test.
256        if i is not None and not isinstance(self._payload, list):
257            raise TypeError('Expected list, got %s' % type(self._payload))
258        payload = self._payload
259        # cte might be a Header, so for now stringify it.
260        cte = str(self.get('content-transfer-encoding', '')).lower()
261        # payload may be bytes here.
262        if isinstance(payload, str):
263            if utils._has_surrogates(payload):
264                bpayload = payload.encode('ascii', 'surrogateescape')
265                if not decode:
266                    try:
267                        payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
268                    except LookupError:
269                        payload = bpayload.decode('ascii', 'replace')
270            elif decode:
271                try:
272                    bpayload = payload.encode('ascii')
273                except UnicodeError:
274                    # This won't happen for RFC compliant messages (messages
275                    # containing only ASCII code points in the unicode input).
276                    # If it does happen, turn the string into bytes in a way
277                    # guaranteed not to fail.
278                    bpayload = payload.encode('raw-unicode-escape')
279        if not decode:
280            return payload
281        if cte == 'quoted-printable':
282            return quopri.decodestring(bpayload)
283        elif cte == 'base64':
284            # XXX: this is a bit of a hack; decode_b should probably be factored
285            # out somewhere, but I haven't figured out where yet.
286            value, defects = decode_b(b''.join(bpayload.splitlines()))
287            for defect in defects:
288                self.policy.handle_defect(self, defect)
289            return value
290        elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
291            in_file = BytesIO(bpayload)
292            out_file = BytesIO()
293            try:
294                uu.decode(in_file, out_file, quiet=True)
295                return out_file.getvalue()
296            except uu.Error:
297                # Some decoding problem
298                return bpayload
299        if isinstance(payload, str):
300            return bpayload
301        return payload
302
303    def set_payload(self, payload, charset=None):
304        """Set the payload to the given value.
305
306        Optional charset sets the message's default character set.  See
307        set_charset() for details.
308        """
309        if hasattr(payload, 'encode'):
310            if charset is None:
311                self._payload = payload
312                return
313            if not isinstance(charset, Charset):
314                charset = Charset(charset)
315            payload = payload.encode(charset.output_charset)
316        if hasattr(payload, 'decode'):
317            self._payload = payload.decode('ascii', 'surrogateescape')
318        else:
319            self._payload = payload
320        if charset is not None:
321            self.set_charset(charset)
322
323    def set_charset(self, charset):
324        """Set the charset of the payload to a given character set.
325
326        charset can be a Charset instance, a string naming a character set, or
327        None.  If it is a string it will be converted to a Charset instance.
328        If charset is None, the charset parameter will be removed from the
329        Content-Type field.  Anything else will generate a TypeError.
330
331        The message will be assumed to be of type text/* encoded with
332        charset.input_charset.  It will be converted to charset.output_charset
333        and encoded properly, if needed, when generating the plain text
334        representation of the message.  MIME headers (MIME-Version,
335        Content-Type, Content-Transfer-Encoding) will be added as needed.
336        """
337        if charset is None:
338            self.del_param('charset')
339            self._charset = None
340            return
341        if not isinstance(charset, Charset):
342            charset = Charset(charset)
343        self._charset = charset
344        if 'MIME-Version' not in self:
345            self.add_header('MIME-Version', '1.0')
346        if 'Content-Type' not in self:
347            self.add_header('Content-Type', 'text/plain',
348                            charset=charset.get_output_charset())
349        else:
350            self.set_param('charset', charset.get_output_charset())
351        if charset != charset.get_output_charset():
352            self._payload = charset.body_encode(self._payload)
353        if 'Content-Transfer-Encoding' not in self:
354            cte = charset.get_body_encoding()
355            try:
356                cte(self)
357            except TypeError:
358                # This 'if' is for backward compatibility, it allows unicode
359                # through even though that won't work correctly if the
360                # message is serialized.
361                payload = self._payload
362                if payload:
363                    try:
364                        payload = payload.encode('ascii', 'surrogateescape')
365                    except UnicodeError:
366                        payload = payload.encode(charset.output_charset)
367                self._payload = charset.body_encode(payload)
368                self.add_header('Content-Transfer-Encoding', cte)
369
370    def get_charset(self):
371        """Return the Charset instance associated with the message's payload.
372        """
373        return self._charset
374
375    #
376    # MAPPING INTERFACE (partial)
377    #
378    def __len__(self):
379        """Return the total number of headers, including duplicates."""
380        return len(self._headers)
381
382    def __getitem__(self, name):
383        """Get a header value.
384
385        Return None if the header is missing instead of raising an exception.
386
387        Note that if the header appeared multiple times, exactly which
388        occurrence gets returned is undefined.  Use get_all() to get all
389        the values matching a header field name.
390        """
391        return self.get(name)
392
393    def __setitem__(self, name, val):
394        """Set the value of a header.
395
396        Note: this does not overwrite an existing header with the same field
397        name.  Use __delitem__() first to delete any existing headers.
398        """
399        max_count = self.policy.header_max_count(name)
400        if max_count:
401            lname = name.lower()
402            found = 0
403            for k, v in self._headers:
404                if k.lower() == lname:
405                    found += 1
406                    if found >= max_count:
407                        raise ValueError("There may be at most {} {} headers "
408                                         "in a message".format(max_count, name))
409        self._headers.append(self.policy.header_store_parse(name, val))
410
411    def __delitem__(self, name):
412        """Delete all occurrences of a header, if present.
413
414        Does not raise an exception if the header is missing.
415        """
416        name = name.lower()
417        newheaders = []
418        for k, v in self._headers:
419            if k.lower() != name:
420                newheaders.append((k, v))
421        self._headers = newheaders
422
423    def __contains__(self, name):
424        return name.lower() in [k.lower() for k, v in self._headers]
425
426    def __iter__(self):
427        for field, value in self._headers:
428            yield field
429
430    def keys(self):
431        """Return a list of all the message's header field names.
432
433        These will be sorted in the order they appeared in the original
434        message, or were added to the message, and may contain duplicates.
435        Any fields deleted and re-inserted are always appended to the header
436        list.
437        """
438        return [k for k, v in self._headers]
439
440    def values(self):
441        """Return a list of all the message's header values.
442
443        These will be sorted in the order they appeared in the original
444        message, or were added to the message, and may contain duplicates.
445        Any fields deleted and re-inserted are always appended to the header
446        list.
447        """
448        return [self.policy.header_fetch_parse(k, v)
449                for k, v in self._headers]
450
451    def items(self):
452        """Get all the message's header fields and values.
453
454        These will be sorted in the order they appeared in the original
455        message, or were added to the message, and may contain duplicates.
456        Any fields deleted and re-inserted are always appended to the header
457        list.
458        """
459        return [(k, self.policy.header_fetch_parse(k, v))
460                for k, v in self._headers]
461
462    def get(self, name, failobj=None):
463        """Get a header value.
464
465        Like __getitem__() but return failobj instead of None when the field
466        is missing.
467        """
468        name = name.lower()
469        for k, v in self._headers:
470            if k.lower() == name:
471                return self.policy.header_fetch_parse(k, v)
472        return failobj
473
474    #
475    # "Internal" methods (public API, but only intended for use by a parser
476    # or generator, not normal application code.
477    #
478
479    def set_raw(self, name, value):
480        """Store name and value in the model without modification.
481
482        This is an "internal" API, intended only for use by a parser.
483        """
484        self._headers.append((name, value))
485
486    def raw_items(self):
487        """Return the (name, value) header pairs without modification.
488
489        This is an "internal" API, intended only for use by a generator.
490        """
491        return iter(self._headers.copy())
492
493    #
494    # Additional useful stuff
495    #
496
497    def get_all(self, name, failobj=None):
498        """Return a list of all the values for the named field.
499
500        These will be sorted in the order they appeared in the original
501        message, and may contain duplicates.  Any fields deleted and
502        re-inserted are always appended to the header list.
503
504        If no such fields exist, failobj is returned (defaults to None).
505        """
506        values = []
507        name = name.lower()
508        for k, v in self._headers:
509            if k.lower() == name:
510                values.append(self.policy.header_fetch_parse(k, v))
511        if not values:
512            return failobj
513        return values
514
515    def add_header(self, _name, _value, **_params):
516        """Extended header setting.
517
518        name is the header field to add.  keyword arguments can be used to set
519        additional parameters for the header field, with underscores converted
520        to dashes.  Normally the parameter will be added as key="value" unless
521        value is None, in which case only the key will be added.  If a
522        parameter value contains non-ASCII characters it can be specified as a
523        three-tuple of (charset, language, value), in which case it will be
524        encoded according to RFC2231 rules.  Otherwise it will be encoded using
525        the utf-8 charset and a language of ''.
526
527        Examples:
528
529        msg.add_header('content-disposition', 'attachment', filename='bud.gif')
530        msg.add_header('content-disposition', 'attachment',
531                       filename=('utf-8', '', Fußballer.ppt'))
532        msg.add_header('content-disposition', 'attachment',
533                       filename='Fußballer.ppt'))
534        """
535        parts = []
536        for k, v in _params.items():
537            if v is None:
538                parts.append(k.replace('_', '-'))
539            else:
540                parts.append(_formatparam(k.replace('_', '-'), v))
541        if _value is not None:
542            parts.insert(0, _value)
543        self[_name] = SEMISPACE.join(parts)
544
545    def replace_header(self, _name, _value):
546        """Replace a header.
547
548        Replace the first matching header found in the message, retaining
549        header order and case.  If no matching header was found, a KeyError is
550        raised.
551        """
552        _name = _name.lower()
553        for i, (k, v) in zip(range(len(self._headers)), self._headers):
554            if k.lower() == _name:
555                self._headers[i] = self.policy.header_store_parse(k, _value)
556                break
557        else:
558            raise KeyError(_name)
559
560    #
561    # Use these three methods instead of the three above.
562    #
563
564    def get_content_type(self):
565        """Return the message's content type.
566
567        The returned string is coerced to lower case of the form
568        `maintype/subtype'.  If there was no Content-Type header in the
569        message, the default type as given by get_default_type() will be
570        returned.  Since according to RFC 2045, messages always have a default
571        type this will always return a value.
572
573        RFC 2045 defines a message's default type to be text/plain unless it
574        appears inside a multipart/digest container, in which case it would be
575        message/rfc822.
576        """
577        missing = object()
578        value = self.get('content-type', missing)
579        if value is missing:
580            # This should have no parameters
581            return self.get_default_type()
582        ctype = _splitparam(value)[0].lower()
583        # RFC 2045, section 5.2 says if its invalid, use text/plain
584        if ctype.count('/') != 1:
585            return 'text/plain'
586        return ctype
587
588    def get_content_maintype(self):
589        """Return the message's main content type.
590
591        This is the `maintype' part of the string returned by
592        get_content_type().
593        """
594        ctype = self.get_content_type()
595        return ctype.split('/')[0]
596
597    def get_content_subtype(self):
598        """Returns the message's sub-content type.
599
600        This is the `subtype' part of the string returned by
601        get_content_type().
602        """
603        ctype = self.get_content_type()
604        return ctype.split('/')[1]
605
606    def get_default_type(self):
607        """Return the `default' content type.
608
609        Most messages have a default content type of text/plain, except for
610        messages that are subparts of multipart/digest containers.  Such
611        subparts have a default content type of message/rfc822.
612        """
613        return self._default_type
614
615    def set_default_type(self, ctype):
616        """Set the `default' content type.
617
618        ctype should be either "text/plain" or "message/rfc822", although this
619        is not enforced.  The default content type is not stored in the
620        Content-Type header.
621        """
622        self._default_type = ctype
623
624    def _get_params_preserve(self, failobj, header):
625        # Like get_params() but preserves the quoting of values.  BAW:
626        # should this be part of the public interface?
627        missing = object()
628        value = self.get(header, missing)
629        if value is missing:
630            return failobj
631        params = []
632        for p in _parseparam(value):
633            try:
634                name, val = p.split('=', 1)
635                name = name.strip()
636                val = val.strip()
637            except ValueError:
638                # Must have been a bare attribute
639                name = p.strip()
640                val = ''
641            params.append((name, val))
642        params = utils.decode_params(params)
643        return params
644
645    def get_params(self, failobj=None, header='content-type', unquote=True):
646        """Return the message's Content-Type parameters, as a list.
647
648        The elements of the returned list are 2-tuples of key/value pairs, as
649        split on the `=' sign.  The left hand side of the `=' is the key,
650        while the right hand side is the value.  If there is no `=' sign in
651        the parameter the value is the empty string.  The value is as
652        described in the get_param() method.
653
654        Optional failobj is the object to return if there is no Content-Type
655        header.  Optional header is the header to search instead of
656        Content-Type.  If unquote is True, the value is unquoted.
657        """
658        missing = object()
659        params = self._get_params_preserve(missing, header)
660        if params is missing:
661            return failobj
662        if unquote:
663            return [(k, _unquotevalue(v)) for k, v in params]
664        else:
665            return params
666
667    def get_param(self, param, failobj=None, header='content-type',
668                  unquote=True):
669        """Return the parameter value if found in the Content-Type header.
670
671        Optional failobj is the object to return if there is no Content-Type
672        header, or the Content-Type header has no such parameter.  Optional
673        header is the header to search instead of Content-Type.
674
675        Parameter keys are always compared case insensitively.  The return
676        value can either be a string, or a 3-tuple if the parameter was RFC
677        2231 encoded.  When it's a 3-tuple, the elements of the value are of
678        the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
679        LANGUAGE can be None, in which case you should consider VALUE to be
680        encoded in the us-ascii charset.  You can usually ignore LANGUAGE.
681        The parameter value (either the returned string, or the VALUE item in
682        the 3-tuple) is always unquoted, unless unquote is set to False.
683
684        If your application doesn't care whether the parameter was RFC 2231
685        encoded, it can turn the return value into a string as follows:
686
687            rawparam = msg.get_param('foo')
688            param = email.utils.collapse_rfc2231_value(rawparam)
689
690        """
691        if header not in self:
692            return failobj
693        for k, v in self._get_params_preserve(failobj, header):
694            if k.lower() == param.lower():
695                if unquote:
696                    return _unquotevalue(v)
697                else:
698                    return v
699        return failobj
700
701    def set_param(self, param, value, header='Content-Type', requote=True,
702                  charset=None, language='', replace=False):
703        """Set a parameter in the Content-Type header.
704
705        If the parameter already exists in the header, its value will be
706        replaced with the new value.
707
708        If header is Content-Type and has not yet been defined for this
709        message, it will be set to "text/plain" and the new parameter and
710        value will be appended as per RFC 2045.
711
712        An alternate header can be specified in the header argument, and all
713        parameters will be quoted as necessary unless requote is False.
714
715        If charset is specified, the parameter will be encoded according to RFC
716        2231.  Optional language specifies the RFC 2231 language, defaulting
717        to the empty string.  Both charset and language should be strings.
718        """
719        if not isinstance(value, tuple) and charset:
720            value = (charset, language, value)
721
722        if header not in self and header.lower() == 'content-type':
723            ctype = 'text/plain'
724        else:
725            ctype = self.get(header)
726        if not self.get_param(param, header=header):
727            if not ctype:
728                ctype = _formatparam(param, value, requote)
729            else:
730                ctype = SEMISPACE.join(
731                    [ctype, _formatparam(param, value, requote)])
732        else:
733            ctype = ''
734            for old_param, old_value in self.get_params(header=header,
735                                                        unquote=requote):
736                append_param = ''
737                if old_param.lower() == param.lower():
738                    append_param = _formatparam(param, value, requote)
739                else:
740                    append_param = _formatparam(old_param, old_value, requote)
741                if not ctype:
742                    ctype = append_param
743                else:
744                    ctype = SEMISPACE.join([ctype, append_param])
745        if ctype != self.get(header):
746            if replace:
747                self.replace_header(header, ctype)
748            else:
749                del self[header]
750                self[header] = ctype
751
752    def del_param(self, param, header='content-type', requote=True):
753        """Remove the given parameter completely from the Content-Type header.
754
755        The header will be re-written in place without the parameter or its
756        value. All values will be quoted as necessary unless requote is
757        False.  Optional header specifies an alternative to the Content-Type
758        header.
759        """
760        if header not in self:
761            return
762        new_ctype = ''
763        for p, v in self.get_params(header=header, unquote=requote):
764            if p.lower() != param.lower():
765                if not new_ctype:
766                    new_ctype = _formatparam(p, v, requote)
767                else:
768                    new_ctype = SEMISPACE.join([new_ctype,
769                                                _formatparam(p, v, requote)])
770        if new_ctype != self.get(header):
771            del self[header]
772            self[header] = new_ctype
773
774    def set_type(self, type, header='Content-Type', requote=True):
775        """Set the main type and subtype for the Content-Type header.
776
777        type must be a string in the form "maintype/subtype", otherwise a
778        ValueError is raised.
779
780        This method replaces the Content-Type header, keeping all the
781        parameters in place.  If requote is False, this leaves the existing
782        header's quoting as is.  Otherwise, the parameters will be quoted (the
783        default).
784
785        An alternative header can be specified in the header argument.  When
786        the Content-Type header is set, we'll always also add a MIME-Version
787        header.
788        """
789        # BAW: should we be strict?
790        if not type.count('/') == 1:
791            raise ValueError
792        # Set the Content-Type, you get a MIME-Version
793        if header.lower() == 'content-type':
794            del self['mime-version']
795            self['MIME-Version'] = '1.0'
796        if header not in self:
797            self[header] = type
798            return
799        params = self.get_params(header=header, unquote=requote)
800        del self[header]
801        self[header] = type
802        # Skip the first param; it's the old type.
803        for p, v in params[1:]:
804            self.set_param(p, v, header, requote)
805
806    def get_filename(self, failobj=None):
807        """Return the filename associated with the payload if present.
808
809        The filename is extracted from the Content-Disposition header's
810        `filename' parameter, and it is unquoted.  If that header is missing
811        the `filename' parameter, this method falls back to looking for the
812        `name' parameter.
813        """
814        missing = object()
815        filename = self.get_param('filename', missing, 'content-disposition')
816        if filename is missing:
817            filename = self.get_param('name', missing, 'content-type')
818        if filename is missing:
819            return failobj
820        return utils.collapse_rfc2231_value(filename).strip()
821
822    def get_boundary(self, failobj=None):
823        """Return the boundary associated with the payload if present.
824
825        The boundary is extracted from the Content-Type header's `boundary'
826        parameter, and it is unquoted.
827        """
828        missing = object()
829        boundary = self.get_param('boundary', missing)
830        if boundary is missing:
831            return failobj
832        # RFC 2046 says that boundaries may begin but not end in w/s
833        return utils.collapse_rfc2231_value(boundary).rstrip()
834
835    def set_boundary(self, boundary):
836        """Set the boundary parameter in Content-Type to 'boundary'.
837
838        This is subtly different than deleting the Content-Type header and
839        adding a new one with a new boundary parameter via add_header().  The
840        main difference is that using the set_boundary() method preserves the
841        order of the Content-Type header in the original message.
842
843        HeaderParseError is raised if the message has no Content-Type header.
844        """
845        missing = object()
846        params = self._get_params_preserve(missing, 'content-type')
847        if params is missing:
848            # There was no Content-Type header, and we don't know what type
849            # to set it to, so raise an exception.
850            raise errors.HeaderParseError('No Content-Type header found')
851        newparams = []
852        foundp = False
853        for pk, pv in params:
854            if pk.lower() == 'boundary':
855                newparams.append(('boundary', '"%s"' % boundary))
856                foundp = True
857            else:
858                newparams.append((pk, pv))
859        if not foundp:
860            # The original Content-Type header had no boundary attribute.
861            # Tack one on the end.  BAW: should we raise an exception
862            # instead???
863            newparams.append(('boundary', '"%s"' % boundary))
864        # Replace the existing Content-Type header with the new value
865        newheaders = []
866        for h, v in self._headers:
867            if h.lower() == 'content-type':
868                parts = []
869                for k, v in newparams:
870                    if v == '':
871                        parts.append(k)
872                    else:
873                        parts.append('%s=%s' % (k, v))
874                val = SEMISPACE.join(parts)
875                newheaders.append(self.policy.header_store_parse(h, val))
876
877            else:
878                newheaders.append((h, v))
879        self._headers = newheaders
880
881    def get_content_charset(self, failobj=None):
882        """Return the charset parameter of the Content-Type header.
883
884        The returned string is always coerced to lower case.  If there is no
885        Content-Type header, or if that header has no charset parameter,
886        failobj is returned.
887        """
888        missing = object()
889        charset = self.get_param('charset', missing)
890        if charset is missing:
891            return failobj
892        if isinstance(charset, tuple):
893            # RFC 2231 encoded, so decode it, and it better end up as ascii.
894            pcharset = charset[0] or 'us-ascii'
895            try:
896                # LookupError will be raised if the charset isn't known to
897                # Python.  UnicodeError will be raised if the encoded text
898                # contains a character not in the charset.
899                as_bytes = charset[2].encode('raw-unicode-escape')
900                charset = str(as_bytes, pcharset)
901            except (LookupError, UnicodeError):
902                charset = charset[2]
903        # charset characters must be in us-ascii range
904        try:
905            charset.encode('us-ascii')
906        except UnicodeError:
907            return failobj
908        # RFC 2046, $4.1.2 says charsets are not case sensitive
909        return charset.lower()
910
911    def get_charsets(self, failobj=None):
912        """Return a list containing the charset(s) used in this message.
913
914        The returned list of items describes the Content-Type headers'
915        charset parameter for this message and all the subparts in its
916        payload.
917
918        Each item will either be a string (the value of the charset parameter
919        in the Content-Type header of that part) or the value of the
920        'failobj' parameter (defaults to None), if the part does not have a
921        main MIME type of "text", or the charset is not defined.
922
923        The list will contain one string for each part of the message, plus
924        one for the container message (i.e. self), so that a non-multipart
925        message will still return a list of length 1.
926        """
927        return [part.get_content_charset(failobj) for part in self.walk()]
928
929    def get_content_disposition(self):
930        """Return the message's content-disposition if it exists, or None.
931
932        The return values can be either 'inline', 'attachment' or None
933        according to the rfc2183.
934        """
935        value = self.get('content-disposition')
936        if value is None:
937            return None
938        c_d = _splitparam(value)[0].lower()
939        return c_d
940
941    # I.e. def walk(self): ...
942    from email.iterators import walk
943
944
945class MIMEPart(Message):
946
947    def __init__(self, policy=None):
948        if policy is None:
949            from email.policy import default
950            policy = default
951        super().__init__(policy)
952
953
954    def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
955        """Return the entire formatted message as a string.
956
957        Optional 'unixfrom', when true, means include the Unix From_ envelope
958        header.  maxheaderlen is retained for backward compatibility with the
959        base Message class, but defaults to None, meaning that the policy value
960        for max_line_length controls the header maximum length.  'policy' is
961        passed to the Generator instance used to serialize the message; if it
962        is not specified the policy associated with the message instance is
963        used.
964        """
965        policy = self.policy if policy is None else policy
966        if maxheaderlen is None:
967            maxheaderlen = policy.max_line_length
968        return super().as_string(unixfrom, maxheaderlen, policy)
969
970    def __str__(self):
971        return self.as_string(policy=self.policy.clone(utf8=True))
972
973    def is_attachment(self):
974        c_d = self.get('content-disposition')
975        return False if c_d is None else c_d.content_disposition == 'attachment'
976
977    def _find_body(self, part, preferencelist):
978        if part.is_attachment():
979            return
980        maintype, subtype = part.get_content_type().split('/')
981        if maintype == 'text':
982            if subtype in preferencelist:
983                yield (preferencelist.index(subtype), part)
984            return
985        if maintype != 'multipart' or not self.is_multipart():
986            return
987        if subtype != 'related':
988            for subpart in part.iter_parts():
989                yield from self._find_body(subpart, preferencelist)
990            return
991        if 'related' in preferencelist:
992            yield (preferencelist.index('related'), part)
993        candidate = None
994        start = part.get_param('start')
995        if start:
996            for subpart in part.iter_parts():
997                if subpart['content-id'] == start:
998                    candidate = subpart
999                    break
1000        if candidate is None:
1001            subparts = part.get_payload()
1002            candidate = subparts[0] if subparts else None
1003        if candidate is not None:
1004            yield from self._find_body(candidate, preferencelist)
1005
1006    def get_body(self, preferencelist=('related', 'html', 'plain')):
1007        """Return best candidate mime part for display as 'body' of message.
1008
1009        Do a depth first search, starting with self, looking for the first part
1010        matching each of the items in preferencelist, and return the part
1011        corresponding to the first item that has a match, or None if no items
1012        have a match.  If 'related' is not included in preferencelist, consider
1013        the root part of any multipart/related encountered as a candidate
1014        match.  Ignore parts with 'Content-Disposition: attachment'.
1015        """
1016        best_prio = len(preferencelist)
1017        body = None
1018        for prio, part in self._find_body(self, preferencelist):
1019            if prio < best_prio:
1020                best_prio = prio
1021                body = part
1022                if prio == 0:
1023                    break
1024        return body
1025
1026    _body_types = {('text', 'plain'),
1027                   ('text', 'html'),
1028                   ('multipart', 'related'),
1029                   ('multipart', 'alternative')}
1030    def iter_attachments(self):
1031        """Return an iterator over the non-main parts of a multipart.
1032
1033        Skip the first of each occurrence of text/plain, text/html,
1034        multipart/related, or multipart/alternative in the multipart (unless
1035        they have a 'Content-Disposition: attachment' header) and include all
1036        remaining subparts in the returned iterator.  When applied to a
1037        multipart/related, return all parts except the root part.  Return an
1038        empty iterator when applied to a multipart/alternative or a
1039        non-multipart.
1040        """
1041        maintype, subtype = self.get_content_type().split('/')
1042        if maintype != 'multipart' or subtype == 'alternative':
1043            return
1044        payload = self.get_payload()
1045        # Certain malformed messages can have content type set to `multipart/*`
1046        # but still have single part body, in which case payload.copy() can
1047        # fail with AttributeError.
1048        try:
1049            parts = payload.copy()
1050        except AttributeError:
1051            # payload is not a list, it is most probably a string.
1052            return
1053
1054        if maintype == 'multipart' and subtype == 'related':
1055            # For related, we treat everything but the root as an attachment.
1056            # The root may be indicated by 'start'; if there's no start or we
1057            # can't find the named start, treat the first subpart as the root.
1058            start = self.get_param('start')
1059            if start:
1060                found = False
1061                attachments = []
1062                for part in parts:
1063                    if part.get('content-id') == start:
1064                        found = True
1065                    else:
1066                        attachments.append(part)
1067                if found:
1068                    yield from attachments
1069                    return
1070            parts.pop(0)
1071            yield from parts
1072            return
1073        # Otherwise we more or less invert the remaining logic in get_body.
1074        # This only really works in edge cases (ex: non-text related or
1075        # alternatives) if the sending agent sets content-disposition.
1076        seen = []   # Only skip the first example of each candidate type.
1077        for part in parts:
1078            maintype, subtype = part.get_content_type().split('/')
1079            if ((maintype, subtype) in self._body_types and
1080                    not part.is_attachment() and subtype not in seen):
1081                seen.append(subtype)
1082                continue
1083            yield part
1084
1085    def iter_parts(self):
1086        """Return an iterator over all immediate subparts of a multipart.
1087
1088        Return an empty iterator for a non-multipart.
1089        """
1090        if self.is_multipart():
1091            yield from self.get_payload()
1092
1093    def get_content(self, *args, content_manager=None, **kw):
1094        if content_manager is None:
1095            content_manager = self.policy.content_manager
1096        return content_manager.get_content(self, *args, **kw)
1097
1098    def set_content(self, *args, content_manager=None, **kw):
1099        if content_manager is None:
1100            content_manager = self.policy.content_manager
1101        content_manager.set_content(self, *args, **kw)
1102
1103    def _make_multipart(self, subtype, disallowed_subtypes, boundary):
1104        if self.get_content_maintype() == 'multipart':
1105            existing_subtype = self.get_content_subtype()
1106            disallowed_subtypes = disallowed_subtypes + (subtype,)
1107            if existing_subtype in disallowed_subtypes:
1108                raise ValueError("Cannot convert {} to {}".format(
1109                    existing_subtype, subtype))
1110        keep_headers = []
1111        part_headers = []
1112        for name, value in self._headers:
1113            if name.lower().startswith('content-'):
1114                part_headers.append((name, value))
1115            else:
1116                keep_headers.append((name, value))
1117        if part_headers:
1118            # There is existing content, move it to the first subpart.
1119            part = type(self)(policy=self.policy)
1120            part._headers = part_headers
1121            part._payload = self._payload
1122            self._payload = [part]
1123        else:
1124            self._payload = []
1125        self._headers = keep_headers
1126        self['Content-Type'] = 'multipart/' + subtype
1127        if boundary is not None:
1128            self.set_param('boundary', boundary)
1129
1130    def make_related(self, boundary=None):
1131        self._make_multipart('related', ('alternative', 'mixed'), boundary)
1132
1133    def make_alternative(self, boundary=None):
1134        self._make_multipart('alternative', ('mixed',), boundary)
1135
1136    def make_mixed(self, boundary=None):
1137        self._make_multipart('mixed', (), boundary)
1138
1139    def _add_multipart(self, _subtype, *args, _disp=None, **kw):
1140        if (self.get_content_maintype() != 'multipart' or
1141                self.get_content_subtype() != _subtype):
1142            getattr(self, 'make_' + _subtype)()
1143        part = type(self)(policy=self.policy)
1144        part.set_content(*args, **kw)
1145        if _disp and 'content-disposition' not in part:
1146            part['Content-Disposition'] = _disp
1147        self.attach(part)
1148
1149    def add_related(self, *args, **kw):
1150        self._add_multipart('related', *args, _disp='inline', **kw)
1151
1152    def add_alternative(self, *args, **kw):
1153        self._add_multipart('alternative', *args, **kw)
1154
1155    def add_attachment(self, *args, **kw):
1156        self._add_multipart('mixed', *args, _disp='attachment', **kw)
1157
1158    def clear(self):
1159        self._headers = []
1160        self._payload = None
1161
1162    def clear_content(self):
1163        self._headers = [(n, v) for n, v in self._headers
1164                         if not n.lower().startswith('content-')]
1165        self._payload = None
1166
1167
1168class EmailMessage(MIMEPart):
1169
1170    def set_content(self, *args, **kw):
1171        super().set_content(*args, **kw)
1172        if 'MIME-Version' not in self:
1173            self['MIME-Version'] = '1.0'
1174