• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2001-2010 Python Software Foundation
2# Contact: email-sig@python.org
3
4"""Classes to generate plain text from a message object tree."""
5
6__all__ = ['Generator', 'DecodedGenerator']
7
8import re
9import sys
10import time
11import random
12import warnings
13
14from cStringIO import StringIO
15from email.header import Header
16
17UNDERSCORE = '_'
18NL = '\n'
19
20fcre = re.compile(r'^From ', re.MULTILINE)
21
22def _is8bitstring(s):
23    if isinstance(s, str):
24        try:
25            unicode(s, 'us-ascii')
26        except UnicodeError:
27            return True
28    return False
29
30
31
32class Generator:
33    """Generates output from a Message object tree.
34
35    This basic generator writes the message to the given file object as plain
36    text.
37    """
38    #
39    # Public interface
40    #
41
42    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
43        """Create the generator for message flattening.
44
45        outfp is the output file-like object for writing the message to.  It
46        must have a write() method.
47
48        Optional mangle_from_ is a flag that, when True (the default), escapes
49        From_ lines in the body of the message by putting a `>' in front of
50        them.
51
52        Optional maxheaderlen specifies the longest length for a non-continued
53        header.  When a header line is longer (in characters, with tabs
54        expanded to 8 spaces) than maxheaderlen, the header will split as
55        defined in the Header class.  Set maxheaderlen to zero to disable
56        header wrapping.  The default is 78, as recommended (but not required)
57        by RFC 2822.
58        """
59        self._fp = outfp
60        self._mangle_from_ = mangle_from_
61        self._maxheaderlen = maxheaderlen
62
63    def write(self, s):
64        # Just delegate to the file object
65        self._fp.write(s)
66
67    def flatten(self, msg, unixfrom=False):
68        """Print the message object tree rooted at msg to the output file
69        specified when the Generator instance was created.
70
71        unixfrom is a flag that forces the printing of a Unix From_ delimiter
72        before the first object in the message tree.  If the original message
73        has no From_ delimiter, a `standard' one is crafted.  By default, this
74        is False to inhibit the printing of any From_ delimiter.
75
76        Note that for subobjects, no From_ line is printed.
77        """
78        if unixfrom:
79            ufrom = msg.get_unixfrom()
80            if not ufrom:
81                ufrom = 'From nobody ' + time.ctime(time.time())
82            print >> self._fp, ufrom
83        self._write(msg)
84
85    def clone(self, fp):
86        """Clone this generator with the exact same options."""
87        return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
88
89    #
90    # Protected interface - undocumented ;/
91    #
92
93    def _write(self, msg):
94        # We can't write the headers yet because of the following scenario:
95        # say a multipart message includes the boundary string somewhere in
96        # its body.  We'd have to calculate the new boundary /before/ we write
97        # the headers so that we can write the correct Content-Type:
98        # parameter.
99        #
100        # The way we do this, so as to make the _handle_*() methods simpler,
101        # is to cache any subpart writes into a StringIO.  The we write the
102        # headers and the StringIO contents.  That way, subpart handlers can
103        # Do The Right Thing, and can still modify the Content-Type: header if
104        # necessary.
105        oldfp = self._fp
106        try:
107            self._fp = sfp = StringIO()
108            self._dispatch(msg)
109        finally:
110            self._fp = oldfp
111        # Write the headers.  First we see if the message object wants to
112        # handle that itself.  If not, we'll do it generically.
113        meth = getattr(msg, '_write_headers', None)
114        if meth is None:
115            self._write_headers(msg)
116        else:
117            meth(self)
118        self._fp.write(sfp.getvalue())
119
120    def _dispatch(self, msg):
121        # Get the Content-Type: for the message, then try to dispatch to
122        # self._handle_<maintype>_<subtype>().  If there's no handler for the
123        # full MIME type, then dispatch to self._handle_<maintype>().  If
124        # that's missing too, then dispatch to self._writeBody().
125        main = msg.get_content_maintype()
126        sub = msg.get_content_subtype()
127        specific = UNDERSCORE.join((main, sub)).replace('-', '_')
128        meth = getattr(self, '_handle_' + specific, None)
129        if meth is None:
130            generic = main.replace('-', '_')
131            meth = getattr(self, '_handle_' + generic, None)
132            if meth is None:
133                meth = self._writeBody
134        meth(msg)
135
136    #
137    # Default handlers
138    #
139
140    def _write_headers(self, msg):
141        for h, v in msg.items():
142            print >> self._fp, '%s:' % h,
143            if self._maxheaderlen == 0:
144                # Explicit no-wrapping
145                print >> self._fp, v
146            elif isinstance(v, Header):
147                # Header instances know what to do
148                print >> self._fp, v.encode()
149            elif _is8bitstring(v):
150                # If we have raw 8bit data in a byte string, we have no idea
151                # what the encoding is.  There is no safe way to split this
152                # string.  If it's ascii-subset, then we could do a normal
153                # ascii split, but if it's multibyte then we could break the
154                # string.  There's no way to know so the least harm seems to
155                # be to not split the string and risk it being too long.
156                print >> self._fp, v
157            else:
158                # Header's got lots of smarts, so use it.  Note that this is
159                # fundamentally broken though because we lose idempotency when
160                # the header string is continued with tabs.  It will now be
161                # continued with spaces.  This was reversedly broken before we
162                # fixed bug 1974.  Either way, we lose.
163                print >> self._fp, Header(
164                    v, maxlinelen=self._maxheaderlen, header_name=h).encode()
165        # A blank line always separates headers from body
166        print >> self._fp
167
168    #
169    # Handlers for writing types and subtypes
170    #
171
172    def _handle_text(self, msg):
173        payload = msg.get_payload()
174        if payload is None:
175            return
176        if not isinstance(payload, basestring):
177            raise TypeError('string payload expected: %s' % type(payload))
178        if self._mangle_from_:
179            payload = fcre.sub('>From ', payload)
180        self._fp.write(payload)
181
182    # Default body handler
183    _writeBody = _handle_text
184
185    def _handle_multipart(self, msg):
186        # The trick here is to write out each part separately, merge them all
187        # together, and then make sure that the boundary we've chosen isn't
188        # present in the payload.
189        msgtexts = []
190        subparts = msg.get_payload()
191        if subparts is None:
192            subparts = []
193        elif isinstance(subparts, basestring):
194            # e.g. a non-strict parse of a message with no starting boundary.
195            self._fp.write(subparts)
196            return
197        elif not isinstance(subparts, list):
198            # Scalar payload
199            subparts = [subparts]
200        for part in subparts:
201            s = StringIO()
202            g = self.clone(s)
203            g.flatten(part, unixfrom=False)
204            msgtexts.append(s.getvalue())
205        # BAW: What about boundaries that are wrapped in double-quotes?
206        boundary = msg.get_boundary()
207        if not boundary:
208            # Create a boundary that doesn't appear in any of the
209            # message texts.
210            alltext = NL.join(msgtexts)
211            boundary = _make_boundary(alltext)
212            msg.set_boundary(boundary)
213        # If there's a preamble, write it out, with a trailing CRLF
214        if msg.preamble is not None:
215            print >> self._fp, msg.preamble
216        # dash-boundary transport-padding CRLF
217        print >> self._fp, '--' + boundary
218        # body-part
219        if msgtexts:
220            self._fp.write(msgtexts.pop(0))
221        # *encapsulation
222        # --> delimiter transport-padding
223        # --> CRLF body-part
224        for body_part in msgtexts:
225            # delimiter transport-padding CRLF
226            print >> self._fp, '\n--' + boundary
227            # body-part
228            self._fp.write(body_part)
229        # close-delimiter transport-padding
230        self._fp.write('\n--' + boundary + '--')
231        if msg.epilogue is not None:
232            print >> self._fp
233            self._fp.write(msg.epilogue)
234
235    def _handle_multipart_signed(self, msg):
236        # The contents of signed parts has to stay unmodified in order to keep
237        # the signature intact per RFC1847 2.1, so we disable header wrapping.
238        # RDM: This isn't enough to completely preserve the part, but it helps.
239        old_maxheaderlen = self._maxheaderlen
240        try:
241            self._maxheaderlen = 0
242            self._handle_multipart(msg)
243        finally:
244            self._maxheaderlen = old_maxheaderlen
245
246    def _handle_message_delivery_status(self, msg):
247        # We can't just write the headers directly to self's file object
248        # because this will leave an extra newline between the last header
249        # block and the boundary.  Sigh.
250        blocks = []
251        for part in msg.get_payload():
252            s = StringIO()
253            g = self.clone(s)
254            g.flatten(part, unixfrom=False)
255            text = s.getvalue()
256            lines = text.split('\n')
257            # Strip off the unnecessary trailing empty line
258            if lines and lines[-1] == '':
259                blocks.append(NL.join(lines[:-1]))
260            else:
261                blocks.append(text)
262        # Now join all the blocks with an empty line.  This has the lovely
263        # effect of separating each block with an empty line, but not adding
264        # an extra one after the last one.
265        self._fp.write(NL.join(blocks))
266
267    def _handle_message(self, msg):
268        s = StringIO()
269        g = self.clone(s)
270        # The payload of a message/rfc822 part should be a multipart sequence
271        # of length 1.  The zeroth element of the list should be the Message
272        # object for the subpart.  Extract that object, stringify it, and
273        # write it out.
274        # Except, it turns out, when it's a string instead, which happens when
275        # and only when HeaderParser is used on a message of mime type
276        # message/rfc822.  Such messages are generated by, for example,
277        # Groupwise when forwarding unadorned messages.  (Issue 7970.)  So
278        # in that case we just emit the string body.
279        payload = msg.get_payload()
280        if isinstance(payload, list):
281            g.flatten(msg.get_payload(0), unixfrom=False)
282            payload = s.getvalue()
283        self._fp.write(payload)
284
285
286
287_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
288
289class DecodedGenerator(Generator):
290    """Generates a text representation of a message.
291
292    Like the Generator base class, except that non-text parts are substituted
293    with a format string representing the part.
294    """
295    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
296        """Like Generator.__init__() except that an additional optional
297        argument is allowed.
298
299        Walks through all subparts of a message.  If the subpart is of main
300        type `text', then it prints the decoded payload of the subpart.
301
302        Otherwise, fmt is a format string that is used instead of the message
303        payload.  fmt is expanded with the following keywords (in
304        %(keyword)s format):
305
306        type       : Full MIME type of the non-text part
307        maintype   : Main MIME type of the non-text part
308        subtype    : Sub-MIME type of the non-text part
309        filename   : Filename of the non-text part
310        description: Description associated with the non-text part
311        encoding   : Content transfer encoding of the non-text part
312
313        The default value for fmt is None, meaning
314
315        [Non-text (%(type)s) part of message omitted, filename %(filename)s]
316        """
317        Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
318        if fmt is None:
319            self._fmt = _FMT
320        else:
321            self._fmt = fmt
322
323    def _dispatch(self, msg):
324        for part in msg.walk():
325            maintype = part.get_content_maintype()
326            if maintype == 'text':
327                print >> self, part.get_payload(decode=True)
328            elif maintype == 'multipart':
329                # Just skip this
330                pass
331            else:
332                print >> self, self._fmt % {
333                    'type'       : part.get_content_type(),
334                    'maintype'   : part.get_content_maintype(),
335                    'subtype'    : part.get_content_subtype(),
336                    'filename'   : part.get_filename('[no filename]'),
337                    'description': part.get('Content-Description',
338                                            '[no description]'),
339                    'encoding'   : part.get('Content-Transfer-Encoding',
340                                            '[no encoding]'),
341                    }
342
343
344
345# Helper
346_width = len(repr(sys.maxint-1))
347_fmt = '%%0%dd' % _width
348
349def _make_boundary(text=None):
350    # Craft a random boundary.  If text is given, ensure that the chosen
351    # boundary doesn't appear in the text.
352    token = random.randrange(sys.maxint)
353    boundary = ('=' * 15) + (_fmt % token) + '=='
354    if text is None:
355        return boundary
356    b = boundary
357    counter = 0
358    while True:
359        cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
360        if not cre.search(text):
361            break
362        b = boundary + '.' + str(counter)
363        counter += 1
364    return b
365