• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2001-2010 Python Software Foundation
2# Contact: email-sig@python.org
3# email package unit tests
4
5import re
6import time
7import base64
8import unittest
9import textwrap
10
11from io import StringIO, BytesIO
12from itertools import chain
13from random import choice
14from threading import Thread
15from unittest.mock import patch
16
17import email
18import email.policy
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator, BytesGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email.mime.nonmultipart import MIMENonMultipart
33from email import utils
34from email import errors
35from email import encoders
36from email import iterators
37from email import base64mime
38from email import quoprimime
39
40from test.support import threading_helper
41from test.support.os_helper import unlink
42from test.test_email import openfile, TestEmailBase
43
44# These imports are documented to work, but we are testing them using a
45# different path, so we import them here just to make sure they are importable.
46from email.parser import FeedParser, BytesFeedParser
47
48NL = '\n'
49EMPTYSTRING = ''
50SPACE = ' '
51
52
53# Test various aspects of the Message class's API
54class TestMessageAPI(TestEmailBase):
55    def test_get_all(self):
56        eq = self.assertEqual
57        msg = self._msgobj('msg_20.txt')
58        eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
59        eq(msg.get_all('xx', 'n/a'), 'n/a')
60
61    def test_getset_charset(self):
62        eq = self.assertEqual
63        msg = Message()
64        eq(msg.get_charset(), None)
65        charset = Charset('iso-8859-1')
66        msg.set_charset(charset)
67        eq(msg['mime-version'], '1.0')
68        eq(msg.get_content_type(), 'text/plain')
69        eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
70        eq(msg.get_param('charset'), 'iso-8859-1')
71        eq(msg['content-transfer-encoding'], 'quoted-printable')
72        eq(msg.get_charset().input_charset, 'iso-8859-1')
73        # Remove the charset
74        msg.set_charset(None)
75        eq(msg.get_charset(), None)
76        eq(msg['content-type'], 'text/plain')
77        # Try adding a charset when there's already MIME headers present
78        msg = Message()
79        msg['MIME-Version'] = '2.0'
80        msg['Content-Type'] = 'text/x-weird'
81        msg['Content-Transfer-Encoding'] = 'quinted-puntable'
82        msg.set_charset(charset)
83        eq(msg['mime-version'], '2.0')
84        eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
85        eq(msg['content-transfer-encoding'], 'quinted-puntable')
86
87    def test_set_charset_from_string(self):
88        eq = self.assertEqual
89        msg = Message()
90        msg.set_charset('us-ascii')
91        eq(msg.get_charset().input_charset, 'us-ascii')
92        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
93
94    def test_set_payload_with_charset(self):
95        msg = Message()
96        charset = Charset('iso-8859-1')
97        msg.set_payload('This is a string payload', charset)
98        self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
99
100    def test_set_payload_with_8bit_data_and_charset(self):
101        data = b'\xd0\x90\xd0\x91\xd0\x92'
102        charset = Charset('utf-8')
103        msg = Message()
104        msg.set_payload(data, charset)
105        self.assertEqual(msg['content-transfer-encoding'], 'base64')
106        self.assertEqual(msg.get_payload(decode=True), data)
107        self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
108
109    def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
110        data = b'\xd0\x90\xd0\x91\xd0\x92'
111        charset = Charset('utf-8')
112        charset.body_encoding = None # Disable base64 encoding
113        msg = Message()
114        msg.set_payload(data.decode('utf-8'), charset)
115        self.assertEqual(msg['content-transfer-encoding'], '8bit')
116        self.assertEqual(msg.get_payload(decode=True), data)
117
118    def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
119        data = b'\xd0\x90\xd0\x91\xd0\x92'
120        charset = Charset('utf-8')
121        charset.body_encoding = None # Disable base64 encoding
122        msg = Message()
123        msg.set_payload(data, charset)
124        self.assertEqual(msg['content-transfer-encoding'], '8bit')
125        self.assertEqual(msg.get_payload(decode=True), data)
126
127    def test_set_payload_to_list(self):
128        msg = Message()
129        msg.set_payload([])
130        self.assertEqual(msg.get_payload(), [])
131
132    def test_attach_when_payload_is_string(self):
133        msg = Message()
134        msg['Content-Type'] = 'multipart/mixed'
135        msg.set_payload('string payload')
136        sub_msg = MIMEMessage(Message())
137        self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart",
138                               msg.attach, sub_msg)
139
140    def test_get_charsets(self):
141        eq = self.assertEqual
142
143        msg = self._msgobj('msg_08.txt')
144        charsets = msg.get_charsets()
145        eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
146
147        msg = self._msgobj('msg_09.txt')
148        charsets = msg.get_charsets('dingbat')
149        eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
150                      'koi8-r'])
151
152        msg = self._msgobj('msg_12.txt')
153        charsets = msg.get_charsets()
154        eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
155                      'iso-8859-3', 'us-ascii', 'koi8-r'])
156
157    def test_get_filename(self):
158        eq = self.assertEqual
159
160        msg = self._msgobj('msg_04.txt')
161        filenames = [p.get_filename() for p in msg.get_payload()]
162        eq(filenames, ['msg.txt', 'msg.txt'])
163
164        msg = self._msgobj('msg_07.txt')
165        subpart = msg.get_payload(1)
166        eq(subpart.get_filename(), 'dingusfish.gif')
167
168    def test_get_filename_with_name_parameter(self):
169        eq = self.assertEqual
170
171        msg = self._msgobj('msg_44.txt')
172        filenames = [p.get_filename() for p in msg.get_payload()]
173        eq(filenames, ['msg.txt', 'msg.txt'])
174
175    def test_get_boundary(self):
176        eq = self.assertEqual
177        msg = self._msgobj('msg_07.txt')
178        # No quotes!
179        eq(msg.get_boundary(), 'BOUNDARY')
180
181    def test_set_boundary(self):
182        eq = self.assertEqual
183        # This one has no existing boundary parameter, but the Content-Type:
184        # header appears fifth.
185        msg = self._msgobj('msg_01.txt')
186        msg.set_boundary('BOUNDARY')
187        header, value = msg.items()[4]
188        eq(header.lower(), 'content-type')
189        eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
190        # This one has a Content-Type: header, with a boundary, stuck in the
191        # middle of its headers.  Make sure the order is preserved; it should
192        # be fifth.
193        msg = self._msgobj('msg_04.txt')
194        msg.set_boundary('BOUNDARY')
195        header, value = msg.items()[4]
196        eq(header.lower(), 'content-type')
197        eq(value, 'multipart/mixed; boundary="BOUNDARY"')
198        # And this one has no Content-Type: header at all.
199        msg = self._msgobj('msg_03.txt')
200        self.assertRaises(errors.HeaderParseError,
201                          msg.set_boundary, 'BOUNDARY')
202
203    def test_make_boundary(self):
204        msg = MIMEMultipart('form-data')
205        # Note that when the boundary gets created is an implementation
206        # detail and might change.
207        self.assertEqual(msg.items()[0][1], 'multipart/form-data')
208        # Trigger creation of boundary
209        msg.as_string()
210        self.assertEqual(msg.items()[0][1][:33],
211                        'multipart/form-data; boundary="==')
212        # XXX: there ought to be tests of the uniqueness of the boundary, too.
213
214    def test_message_rfc822_only(self):
215        # Issue 7970: message/rfc822 not in multipart parsed by
216        # HeaderParser caused an exception when flattened.
217        with openfile('msg_46.txt', encoding="utf-8") as fp:
218            msgdata = fp.read()
219        parser = HeaderParser()
220        msg = parser.parsestr(msgdata)
221        out = StringIO()
222        gen = Generator(out, True, 0)
223        gen.flatten(msg, False)
224        self.assertEqual(out.getvalue(), msgdata)
225
226    def test_byte_message_rfc822_only(self):
227        # Make sure new bytes header parser also passes this.
228        with openfile('msg_46.txt', encoding="utf-8") as fp:
229            msgdata = fp.read().encode('ascii')
230        parser = email.parser.BytesHeaderParser()
231        msg = parser.parsebytes(msgdata)
232        out = BytesIO()
233        gen = email.generator.BytesGenerator(out)
234        gen.flatten(msg)
235        self.assertEqual(out.getvalue(), msgdata)
236
237    def test_get_decoded_payload(self):
238        eq = self.assertEqual
239        msg = self._msgobj('msg_10.txt')
240        # The outer message is a multipart
241        eq(msg.get_payload(decode=True), None)
242        # Subpart 1 is 7bit encoded
243        eq(msg.get_payload(0).get_payload(decode=True),
244           b'This is a 7bit encoded message.\n')
245        # Subpart 2 is quopri
246        eq(msg.get_payload(1).get_payload(decode=True),
247           b'\xa1This is a Quoted Printable encoded message!\n')
248        # Subpart 3 is base64
249        eq(msg.get_payload(2).get_payload(decode=True),
250           b'This is a Base64 encoded message.')
251        # Subpart 4 is base64 with a trailing newline, which
252        # used to be stripped (issue 7143).
253        eq(msg.get_payload(3).get_payload(decode=True),
254           b'This is a Base64 encoded message.\n')
255        # Subpart 5 has no Content-Transfer-Encoding: header.
256        eq(msg.get_payload(4).get_payload(decode=True),
257           b'This has no Content-Transfer-Encoding: header.\n')
258
259    def test_get_decoded_uu_payload(self):
260        eq = self.assertEqual
261        msg = Message()
262        msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
263        for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
264            msg['content-transfer-encoding'] = cte
265            eq(msg.get_payload(decode=True), b'hello world')
266        # Now try some bogus data
267        msg.set_payload('foo')
268        eq(msg.get_payload(decode=True), b'foo')
269
270    def test_get_payload_n_raises_on_non_multipart(self):
271        msg = Message()
272        self.assertRaises(TypeError, msg.get_payload, 1)
273
274    def test_decoded_generator(self):
275        eq = self.assertEqual
276        msg = self._msgobj('msg_07.txt')
277        with openfile('msg_17.txt', encoding="utf-8") as fp:
278            text = fp.read()
279        s = StringIO()
280        g = DecodedGenerator(s)
281        g.flatten(msg)
282        eq(s.getvalue(), text)
283
284    def test__contains__(self):
285        msg = Message()
286        msg['From'] = 'Me'
287        msg['to'] = 'You'
288        # Check for case insensitivity
289        self.assertIn('from', msg)
290        self.assertIn('From', msg)
291        self.assertIn('FROM', msg)
292        self.assertIn('to', msg)
293        self.assertIn('To', msg)
294        self.assertIn('TO', msg)
295
296    def test_as_string(self):
297        msg = self._msgobj('msg_01.txt')
298        with openfile('msg_01.txt', encoding="utf-8") as fp:
299            text = fp.read()
300        self.assertEqual(text, str(msg))
301        fullrepr = msg.as_string(unixfrom=True)
302        lines = fullrepr.split('\n')
303        self.assertTrue(lines[0].startswith('From '))
304        self.assertEqual(text, NL.join(lines[1:]))
305
306    def test_as_string_policy(self):
307        msg = self._msgobj('msg_01.txt')
308        newpolicy = msg.policy.clone(linesep='\r\n')
309        fullrepr = msg.as_string(policy=newpolicy)
310        s = StringIO()
311        g = Generator(s, policy=newpolicy)
312        g.flatten(msg)
313        self.assertEqual(fullrepr, s.getvalue())
314
315    def test_nonascii_as_string_without_cte(self):
316        m = textwrap.dedent("""\
317            MIME-Version: 1.0
318            Content-type: text/plain; charset="iso-8859-1"
319
320            Test if non-ascii messages with no Content-Transfer-Encoding set
321            can be as_string'd:
322            Föö bär
323            """)
324        source = m.encode('iso-8859-1')
325        expected = textwrap.dedent("""\
326            MIME-Version: 1.0
327            Content-type: text/plain; charset="iso-8859-1"
328            Content-Transfer-Encoding: quoted-printable
329
330            Test if non-ascii messages with no Content-Transfer-Encoding set
331            can be as_string'd:
332            F=F6=F6 b=E4r
333            """)
334        msg = email.message_from_bytes(source)
335        self.assertEqual(msg.as_string(), expected)
336
337    def test_nonascii_as_string_without_content_type_and_cte(self):
338        m = textwrap.dedent("""\
339            MIME-Version: 1.0
340
341            Test if non-ascii messages with no Content-Type nor
342            Content-Transfer-Encoding set can be as_string'd:
343            Föö bär
344            """)
345        source = m.encode('iso-8859-1')
346        expected = source.decode('ascii', 'replace')
347        msg = email.message_from_bytes(source)
348        self.assertEqual(msg.as_string(), expected)
349
350    def test_as_bytes(self):
351        msg = self._msgobj('msg_01.txt')
352        with openfile('msg_01.txt', encoding="utf-8") as fp:
353            data = fp.read().encode('ascii')
354        self.assertEqual(data, bytes(msg))
355        fullrepr = msg.as_bytes(unixfrom=True)
356        lines = fullrepr.split(b'\n')
357        self.assertTrue(lines[0].startswith(b'From '))
358        self.assertEqual(data, b'\n'.join(lines[1:]))
359
360    def test_as_bytes_policy(self):
361        msg = self._msgobj('msg_01.txt')
362        newpolicy = msg.policy.clone(linesep='\r\n')
363        fullrepr = msg.as_bytes(policy=newpolicy)
364        s = BytesIO()
365        g = BytesGenerator(s,policy=newpolicy)
366        g.flatten(msg)
367        self.assertEqual(fullrepr, s.getvalue())
368
369    # test_headerregistry.TestContentTypeHeader.bad_params
370    def test_bad_param(self):
371        msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
372        self.assertEqual(msg.get_param('baz'), '')
373
374    def test_missing_filename(self):
375        msg = email.message_from_string("From: foo\n")
376        self.assertEqual(msg.get_filename(), None)
377
378    def test_bogus_filename(self):
379        msg = email.message_from_string(
380        "Content-Disposition: blarg; filename\n")
381        self.assertEqual(msg.get_filename(), '')
382
383    def test_missing_boundary(self):
384        msg = email.message_from_string("From: foo\n")
385        self.assertEqual(msg.get_boundary(), None)
386
387    def test_get_params(self):
388        eq = self.assertEqual
389        msg = email.message_from_string(
390            'X-Header: foo=one; bar=two; baz=three\n')
391        eq(msg.get_params(header='x-header'),
392           [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
393        msg = email.message_from_string(
394            'X-Header: foo; bar=one; baz=two\n')
395        eq(msg.get_params(header='x-header'),
396           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
397        eq(msg.get_params(), None)
398        msg = email.message_from_string(
399            'X-Header: foo; bar="one"; baz=two\n')
400        eq(msg.get_params(header='x-header'),
401           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
402
403    # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
404    def test_get_param_liberal(self):
405        msg = Message()
406        msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
407        self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
408
409    def test_get_param(self):
410        eq = self.assertEqual
411        msg = email.message_from_string(
412            "X-Header: foo=one; bar=two; baz=three\n")
413        eq(msg.get_param('bar', header='x-header'), 'two')
414        eq(msg.get_param('quuz', header='x-header'), None)
415        eq(msg.get_param('quuz'), None)
416        msg = email.message_from_string(
417            'X-Header: foo; bar="one"; baz=two\n')
418        eq(msg.get_param('foo', header='x-header'), '')
419        eq(msg.get_param('bar', header='x-header'), 'one')
420        eq(msg.get_param('baz', header='x-header'), 'two')
421        # XXX: We are not RFC-2045 compliant!  We cannot parse:
422        # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
423        # msg.get_param("weird")
424        # yet.
425
426    # test_headerregistry.TestContentTypeHeader.spaces_around_semis
427    def test_get_param_funky_continuation_lines(self):
428        msg = self._msgobj('msg_22.txt')
429        self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
430
431    # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
432    def test_get_param_with_semis_in_quotes(self):
433        msg = email.message_from_string(
434            'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
435        self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
436        self.assertEqual(msg.get_param('name', unquote=False),
437                         '"Jim&amp;&amp;Jill"')
438
439    # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
440    def test_get_param_with_quotes(self):
441        msg = email.message_from_string(
442            'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
443        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
444        msg = email.message_from_string(
445            "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
446        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
447
448    def test_field_containment(self):
449        msg = email.message_from_string('Header: exists')
450        self.assertIn('header', msg)
451        self.assertIn('Header', msg)
452        self.assertIn('HEADER', msg)
453        self.assertNotIn('headerx', msg)
454
455    def test_set_param(self):
456        eq = self.assertEqual
457        msg = Message()
458        msg.set_param('charset', 'iso-2022-jp')
459        eq(msg.get_param('charset'), 'iso-2022-jp')
460        msg.set_param('importance', 'high value')
461        eq(msg.get_param('importance'), 'high value')
462        eq(msg.get_param('importance', unquote=False), '"high value"')
463        eq(msg.get_params(), [('text/plain', ''),
464                              ('charset', 'iso-2022-jp'),
465                              ('importance', 'high value')])
466        eq(msg.get_params(unquote=False), [('text/plain', ''),
467                                       ('charset', '"iso-2022-jp"'),
468                                       ('importance', '"high value"')])
469        msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
470        eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
471
472    def test_del_param(self):
473        eq = self.assertEqual
474        msg = self._msgobj('msg_05.txt')
475        eq(msg.get_params(),
476           [('multipart/report', ''), ('report-type', 'delivery-status'),
477            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
478        old_val = msg.get_param("report-type")
479        msg.del_param("report-type")
480        eq(msg.get_params(),
481           [('multipart/report', ''),
482            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
483        msg.set_param("report-type", old_val)
484        eq(msg.get_params(),
485           [('multipart/report', ''),
486            ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
487            ('report-type', old_val)])
488
489    def test_del_param_on_other_header(self):
490        msg = Message()
491        msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
492        msg.del_param('filename', 'content-disposition')
493        self.assertEqual(msg['content-disposition'], 'attachment')
494
495    def test_del_param_on_nonexistent_header(self):
496        msg = Message()
497        # Deleting param on empty msg should not raise exception.
498        msg.del_param('filename', 'content-disposition')
499
500    def test_del_nonexistent_param(self):
501        msg = Message()
502        msg.add_header('Content-Type', 'text/plain', charset='utf-8')
503        existing_header = msg['Content-Type']
504        msg.del_param('foobar', header='Content-Type')
505        self.assertEqual(msg['Content-Type'], existing_header)
506
507    def test_set_type(self):
508        eq = self.assertEqual
509        msg = Message()
510        self.assertRaises(ValueError, msg.set_type, 'text')
511        msg.set_type('text/plain')
512        eq(msg['content-type'], 'text/plain')
513        msg.set_param('charset', 'us-ascii')
514        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
515        msg.set_type('text/html')
516        eq(msg['content-type'], 'text/html; charset="us-ascii"')
517
518    def test_set_type_on_other_header(self):
519        msg = Message()
520        msg['X-Content-Type'] = 'text/plain'
521        msg.set_type('application/octet-stream', 'X-Content-Type')
522        self.assertEqual(msg['x-content-type'], 'application/octet-stream')
523
524    def test_get_content_type_missing(self):
525        msg = Message()
526        self.assertEqual(msg.get_content_type(), 'text/plain')
527
528    def test_get_content_type_missing_with_default_type(self):
529        msg = Message()
530        msg.set_default_type('message/rfc822')
531        self.assertEqual(msg.get_content_type(), 'message/rfc822')
532
533    def test_get_content_type_from_message_implicit(self):
534        msg = self._msgobj('msg_30.txt')
535        self.assertEqual(msg.get_payload(0).get_content_type(),
536                         'message/rfc822')
537
538    def test_get_content_type_from_message_explicit(self):
539        msg = self._msgobj('msg_28.txt')
540        self.assertEqual(msg.get_payload(0).get_content_type(),
541                         'message/rfc822')
542
543    def test_get_content_type_from_message_text_plain_implicit(self):
544        msg = self._msgobj('msg_03.txt')
545        self.assertEqual(msg.get_content_type(), 'text/plain')
546
547    def test_get_content_type_from_message_text_plain_explicit(self):
548        msg = self._msgobj('msg_01.txt')
549        self.assertEqual(msg.get_content_type(), 'text/plain')
550
551    def test_get_content_maintype_missing(self):
552        msg = Message()
553        self.assertEqual(msg.get_content_maintype(), 'text')
554
555    def test_get_content_maintype_missing_with_default_type(self):
556        msg = Message()
557        msg.set_default_type('message/rfc822')
558        self.assertEqual(msg.get_content_maintype(), 'message')
559
560    def test_get_content_maintype_from_message_implicit(self):
561        msg = self._msgobj('msg_30.txt')
562        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
563
564    def test_get_content_maintype_from_message_explicit(self):
565        msg = self._msgobj('msg_28.txt')
566        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
567
568    def test_get_content_maintype_from_message_text_plain_implicit(self):
569        msg = self._msgobj('msg_03.txt')
570        self.assertEqual(msg.get_content_maintype(), 'text')
571
572    def test_get_content_maintype_from_message_text_plain_explicit(self):
573        msg = self._msgobj('msg_01.txt')
574        self.assertEqual(msg.get_content_maintype(), 'text')
575
576    def test_get_content_subtype_missing(self):
577        msg = Message()
578        self.assertEqual(msg.get_content_subtype(), 'plain')
579
580    def test_get_content_subtype_missing_with_default_type(self):
581        msg = Message()
582        msg.set_default_type('message/rfc822')
583        self.assertEqual(msg.get_content_subtype(), 'rfc822')
584
585    def test_get_content_subtype_from_message_implicit(self):
586        msg = self._msgobj('msg_30.txt')
587        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
588
589    def test_get_content_subtype_from_message_explicit(self):
590        msg = self._msgobj('msg_28.txt')
591        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
592
593    def test_get_content_subtype_from_message_text_plain_implicit(self):
594        msg = self._msgobj('msg_03.txt')
595        self.assertEqual(msg.get_content_subtype(), 'plain')
596
597    def test_get_content_subtype_from_message_text_plain_explicit(self):
598        msg = self._msgobj('msg_01.txt')
599        self.assertEqual(msg.get_content_subtype(), 'plain')
600
601    def test_get_content_maintype_error(self):
602        msg = Message()
603        msg['Content-Type'] = 'no-slash-in-this-string'
604        self.assertEqual(msg.get_content_maintype(), 'text')
605
606    def test_get_content_subtype_error(self):
607        msg = Message()
608        msg['Content-Type'] = 'no-slash-in-this-string'
609        self.assertEqual(msg.get_content_subtype(), 'plain')
610
611    def test_replace_header(self):
612        eq = self.assertEqual
613        msg = Message()
614        msg.add_header('First', 'One')
615        msg.add_header('Second', 'Two')
616        msg.add_header('Third', 'Three')
617        eq(msg.keys(), ['First', 'Second', 'Third'])
618        eq(msg.values(), ['One', 'Two', 'Three'])
619        msg.replace_header('Second', 'Twenty')
620        eq(msg.keys(), ['First', 'Second', 'Third'])
621        eq(msg.values(), ['One', 'Twenty', 'Three'])
622        msg.add_header('First', 'Eleven')
623        msg.replace_header('First', 'One Hundred')
624        eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
625        eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
626        self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
627
628    def test_get_content_disposition(self):
629        msg = Message()
630        self.assertIsNone(msg.get_content_disposition())
631        msg.add_header('Content-Disposition', 'attachment',
632                       filename='random.avi')
633        self.assertEqual(msg.get_content_disposition(), 'attachment')
634        msg.replace_header('Content-Disposition', 'inline')
635        self.assertEqual(msg.get_content_disposition(), 'inline')
636        msg.replace_header('Content-Disposition', 'InlinE')
637        self.assertEqual(msg.get_content_disposition(), 'inline')
638
639    # test_defect_handling:test_invalid_chars_in_base64_payload
640    def test_broken_base64_payload(self):
641        x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
642        msg = Message()
643        msg['content-type'] = 'audio/x-midi'
644        msg['content-transfer-encoding'] = 'base64'
645        msg.set_payload(x)
646        self.assertEqual(msg.get_payload(decode=True),
647                         (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
648                          b'\xa1\x00p\xf6\xbf\xe9\x0f'))
649        self.assertIsInstance(msg.defects[0],
650                              errors.InvalidBase64CharactersDefect)
651
652    def test_broken_unicode_payload(self):
653        # This test improves coverage but is not a compliance test.
654        # The behavior in this situation is currently undefined by the API.
655        x = 'this is a br\xf6ken thing to do'
656        msg = Message()
657        msg['content-type'] = 'text/plain'
658        msg['content-transfer-encoding'] = '8bit'
659        msg.set_payload(x)
660        self.assertEqual(msg.get_payload(decode=True),
661                         bytes(x, 'raw-unicode-escape'))
662
663    def test_questionable_bytes_payload(self):
664        # This test improves coverage but is not a compliance test,
665        # since it involves poking inside the black box.
666        x = 'this is a quéstionable thing to do'.encode('utf-8')
667        msg = Message()
668        msg['content-type'] = 'text/plain; charset="utf-8"'
669        msg['content-transfer-encoding'] = '8bit'
670        msg._payload = x
671        self.assertEqual(msg.get_payload(decode=True), x)
672
673    # Issue 1078919
674    def test_ascii_add_header(self):
675        msg = Message()
676        msg.add_header('Content-Disposition', 'attachment',
677                       filename='bud.gif')
678        self.assertEqual('attachment; filename="bud.gif"',
679            msg['Content-Disposition'])
680
681    def test_noascii_add_header(self):
682        msg = Message()
683        msg.add_header('Content-Disposition', 'attachment',
684            filename="Fußballer.ppt")
685        self.assertEqual(
686            'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
687            msg['Content-Disposition'])
688
689    def test_nonascii_add_header_via_triple(self):
690        msg = Message()
691        msg.add_header('Content-Disposition', 'attachment',
692            filename=('iso-8859-1', '', 'Fußballer.ppt'))
693        self.assertEqual(
694            'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
695            msg['Content-Disposition'])
696
697    def test_ascii_add_header_with_tspecial(self):
698        msg = Message()
699        msg.add_header('Content-Disposition', 'attachment',
700            filename="windows [filename].ppt")
701        self.assertEqual(
702            'attachment; filename="windows [filename].ppt"',
703            msg['Content-Disposition'])
704
705    def test_nonascii_add_header_with_tspecial(self):
706        msg = Message()
707        msg.add_header('Content-Disposition', 'attachment',
708            filename="Fußballer [filename].ppt")
709        self.assertEqual(
710            "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
711            msg['Content-Disposition'])
712
713    def test_binary_quopri_payload(self):
714        for charset in ('latin-1', 'ascii'):
715            msg = Message()
716            msg['content-type'] = 'text/plain; charset=%s' % charset
717            msg['content-transfer-encoding'] = 'quoted-printable'
718            msg.set_payload(b'foo=e6=96=87bar')
719            self.assertEqual(
720                msg.get_payload(decode=True),
721                b'foo\xe6\x96\x87bar',
722                'get_payload returns wrong result with charset %s.' % charset)
723
724    def test_binary_base64_payload(self):
725        for charset in ('latin-1', 'ascii'):
726            msg = Message()
727            msg['content-type'] = 'text/plain; charset=%s' % charset
728            msg['content-transfer-encoding'] = 'base64'
729            msg.set_payload(b'Zm9v5paHYmFy')
730            self.assertEqual(
731                msg.get_payload(decode=True),
732                b'foo\xe6\x96\x87bar',
733                'get_payload returns wrong result with charset %s.' % charset)
734
735    def test_binary_uuencode_payload(self):
736        for charset in ('latin-1', 'ascii'):
737            for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
738                msg = Message()
739                msg['content-type'] = 'text/plain; charset=%s' % charset
740                msg['content-transfer-encoding'] = encoding
741                msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
742                self.assertEqual(
743                    msg.get_payload(decode=True),
744                    b'foo\xe6\x96\x87bar',
745                    str(('get_payload returns wrong result ',
746                         'with charset {0} and encoding {1}.')).\
747                        format(charset, encoding))
748
749    def test_add_header_with_name_only_param(self):
750        msg = Message()
751        msg.add_header('Content-Disposition', 'inline', foo_bar=None)
752        self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
753
754    def test_add_header_with_no_value(self):
755        msg = Message()
756        msg.add_header('X-Status', None)
757        self.assertEqual('', msg['X-Status'])
758
759    # Issue 5871: reject an attempt to embed a header inside a header value
760    # (header injection attack).
761    def test_embedded_header_via_Header_rejected(self):
762        msg = Message()
763        msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
764        self.assertRaises(errors.HeaderParseError, msg.as_string)
765
766    def test_embedded_header_via_string_rejected(self):
767        msg = Message()
768        msg['Dummy'] = 'dummy\nX-Injected-Header: test'
769        self.assertRaises(errors.HeaderParseError, msg.as_string)
770
771    def test_unicode_header_defaults_to_utf8_encoding(self):
772        # Issue 14291
773        m = MIMEText('abc\n')
774        m['Subject'] = 'É test'
775        self.assertEqual(str(m),textwrap.dedent("""\
776            Content-Type: text/plain; charset="us-ascii"
777            MIME-Version: 1.0
778            Content-Transfer-Encoding: 7bit
779            Subject: =?utf-8?q?=C3=89_test?=
780
781            abc
782            """))
783
784    def test_unicode_body_defaults_to_utf8_encoding(self):
785        # Issue 14291
786        m = MIMEText('É testabc\n')
787        self.assertEqual(str(m),textwrap.dedent("""\
788            Content-Type: text/plain; charset="utf-8"
789            MIME-Version: 1.0
790            Content-Transfer-Encoding: base64
791
792            w4kgdGVzdGFiYwo=
793            """))
794
795
796# Test the email.encoders module
797class TestEncoders(unittest.TestCase):
798
799    def test_EncodersEncode_base64(self):
800        with openfile('PyBanner048.gif', 'rb') as fp:
801            bindata = fp.read()
802        mimed = email.mime.image.MIMEImage(bindata)
803        base64ed = mimed.get_payload()
804        # the transfer-encoded body lines should all be <=76 characters
805        lines = base64ed.split('\n')
806        self.assertLessEqual(max([ len(x) for x in lines ]), 76)
807
808    def test_encode_empty_payload(self):
809        eq = self.assertEqual
810        msg = Message()
811        msg.set_charset('us-ascii')
812        eq(msg['content-transfer-encoding'], '7bit')
813
814    def test_default_cte(self):
815        eq = self.assertEqual
816        # 7bit data and the default us-ascii _charset
817        msg = MIMEText('hello world')
818        eq(msg['content-transfer-encoding'], '7bit')
819        # Similar, but with 8bit data
820        msg = MIMEText('hello \xf8 world')
821        eq(msg['content-transfer-encoding'], 'base64')
822        # And now with a different charset
823        msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
824        eq(msg['content-transfer-encoding'], 'quoted-printable')
825
826    def test_encode7or8bit(self):
827        # Make sure a charset whose input character set is 8bit but
828        # whose output character set is 7bit gets a transfer-encoding
829        # of 7bit.
830        eq = self.assertEqual
831        msg = MIMEText('文\n', _charset='euc-jp')
832        eq(msg['content-transfer-encoding'], '7bit')
833        eq(msg.as_string(), textwrap.dedent("""\
834            MIME-Version: 1.0
835            Content-Type: text/plain; charset="iso-2022-jp"
836            Content-Transfer-Encoding: 7bit
837
838            \x1b$BJ8\x1b(B
839            """))
840
841    def test_qp_encode_latin1(self):
842        msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
843        self.assertEqual(str(msg), textwrap.dedent("""\
844            MIME-Version: 1.0
845            Content-Type: text/text; charset="iso-8859-1"
846            Content-Transfer-Encoding: quoted-printable
847
848            =E1=F6
849            """))
850
851    def test_qp_encode_non_latin1(self):
852        # Issue 16948
853        msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
854        self.assertEqual(str(msg), textwrap.dedent("""\
855            MIME-Version: 1.0
856            Content-Type: text/text; charset="iso-8859-2"
857            Content-Transfer-Encoding: quoted-printable
858
859            =BF
860            """))
861
862
863# Test long header wrapping
864class TestLongHeaders(TestEmailBase):
865
866    maxDiff = None
867
868    def test_split_long_continuation(self):
869        eq = self.ndiffAssertEqual
870        msg = email.message_from_string("""\
871Subject: bug demonstration
872\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
873\tmore text
874
875test
876""")
877        sfp = StringIO()
878        g = Generator(sfp)
879        g.flatten(msg)
880        eq(sfp.getvalue(), """\
881Subject: bug demonstration
882\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
883\tmore text
884
885test
886""")
887
888    def test_another_long_almost_unsplittable_header(self):
889        eq = self.ndiffAssertEqual
890        hstr = """\
891bug demonstration
892\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
893\tmore text"""
894        h = Header(hstr, continuation_ws='\t')
895        eq(h.encode(), """\
896bug demonstration
897\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
898\tmore text""")
899        h = Header(hstr.replace('\t', ' '))
900        eq(h.encode(), """\
901bug demonstration
902 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
903 more text""")
904
905    def test_long_nonstring(self):
906        eq = self.ndiffAssertEqual
907        g = Charset("iso-8859-1")
908        cz = Charset("iso-8859-2")
909        utf8 = Charset("utf-8")
910        g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
911                  b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
912                  b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
913                  b'bef\xf6rdert. ')
914        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
915                   b'd\xf9vtipu.. ')
916        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
917                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
918                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
919                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
920                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
921                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
922                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
923                     '\u3044\u307e\u3059\u3002')
924        h = Header(g_head, g, header_name='Subject')
925        h.append(cz_head, cz)
926        h.append(utf8_head, utf8)
927        msg = Message()
928        msg['Subject'] = h
929        sfp = StringIO()
930        g = Generator(sfp)
931        g.flatten(msg)
932        eq(sfp.getvalue(), """\
933Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
934 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
935 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
936 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
937 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
938 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
939 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
940 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
941 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
942 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
943 =?utf-8?b?44CC?=
944
945""")
946        eq(h.encode(maxlinelen=76), """\
947=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
948 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
949 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
950 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
951 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
952 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
953 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
954 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
955 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
956 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
957 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
958
959    def test_long_header_encode(self):
960        eq = self.ndiffAssertEqual
961        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
962                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
963                   header_name='X-Foobar-Spoink-Defrobnit')
964        eq(h.encode(), '''\
965wasnipoop; giraffes="very-long-necked-animals";
966 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
967
968    def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
969        eq = self.ndiffAssertEqual
970        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
971                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
972                   header_name='X-Foobar-Spoink-Defrobnit',
973                   continuation_ws='\t')
974        eq(h.encode(), '''\
975wasnipoop; giraffes="very-long-necked-animals";
976 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
977
978    def test_long_header_encode_with_tab_continuation(self):
979        eq = self.ndiffAssertEqual
980        h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
981                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
982                   header_name='X-Foobar-Spoink-Defrobnit',
983                   continuation_ws='\t')
984        eq(h.encode(), '''\
985wasnipoop; giraffes="very-long-necked-animals";
986\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
987
988    def test_header_encode_with_different_output_charset(self):
989        h = Header('文', 'euc-jp')
990        self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
991
992    def test_long_header_encode_with_different_output_charset(self):
993        h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
994            b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
995            b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
996            b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
997        res = """\
998=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
999 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
1000        self.assertEqual(h.encode(), res)
1001
1002    def test_header_splitter(self):
1003        eq = self.ndiffAssertEqual
1004        msg = MIMEText('')
1005        # It'd be great if we could use add_header() here, but that doesn't
1006        # guarantee an order of the parameters.
1007        msg['X-Foobar-Spoink-Defrobnit'] = (
1008            'wasnipoop; giraffes="very-long-necked-animals"; '
1009            'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
1010        sfp = StringIO()
1011        g = Generator(sfp)
1012        g.flatten(msg)
1013        eq(sfp.getvalue(), '''\
1014Content-Type: text/plain; charset="us-ascii"
1015MIME-Version: 1.0
1016Content-Transfer-Encoding: 7bit
1017X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
1018 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
1019
1020''')
1021
1022    def test_no_semis_header_splitter(self):
1023        eq = self.ndiffAssertEqual
1024        msg = Message()
1025        msg['From'] = 'test@dom.ain'
1026        msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
1027        msg.set_payload('Test')
1028        sfp = StringIO()
1029        g = Generator(sfp)
1030        g.flatten(msg)
1031        eq(sfp.getvalue(), """\
1032From: test@dom.ain
1033References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
1034 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
1035
1036Test""")
1037
1038    def test_last_split_chunk_does_not_fit(self):
1039        eq = self.ndiffAssertEqual
1040        h = Header('Subject: the first part of this is short, but_the_second'
1041            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1042            '_all_by_itself')
1043        eq(h.encode(), """\
1044Subject: the first part of this is short,
1045 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1046
1047    def test_splittable_leading_char_followed_by_overlong_unsplittable(self):
1048        eq = self.ndiffAssertEqual
1049        h = Header(', but_the_second'
1050            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1051            '_all_by_itself')
1052        eq(h.encode(), """\
1053,
1054 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1055
1056    def test_multiple_splittable_leading_char_followed_by_overlong_unsplittable(self):
1057        eq = self.ndiffAssertEqual
1058        h = Header(', , but_the_second'
1059            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1060            '_all_by_itself')
1061        eq(h.encode(), """\
1062, ,
1063 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1064
1065    def test_trailing_splittable_on_overlong_unsplittable(self):
1066        eq = self.ndiffAssertEqual
1067        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1068            'be_on_a_line_all_by_itself;')
1069        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
1070            "be_on_a_line_all_by_itself;")
1071
1072    def test_trailing_splittable_on_overlong_unsplittable_with_leading_splittable(self):
1073        eq = self.ndiffAssertEqual
1074        h = Header('; '
1075            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1076            'be_on_a_line_all_by_itself; ')
1077        eq(h.encode(), """\
1078;
1079 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1080
1081    def test_long_header_with_multiple_sequential_split_chars(self):
1082        eq = self.ndiffAssertEqual
1083        h = Header('This is a long line that has two whitespaces  in a row.  '
1084            'This used to cause truncation of the header when folded')
1085        eq(h.encode(), """\
1086This is a long line that has two whitespaces  in a row.  This used to cause
1087 truncation of the header when folded""")
1088
1089    def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
1090        eq = self.ndiffAssertEqual
1091        h = Header('thisverylongheaderhas;semicolons;and,commas,but'
1092            'they;arenotlegal;fold,points')
1093        eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
1094                        "arenotlegal;fold,points")
1095
1096    def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
1097        eq = self.ndiffAssertEqual
1098        h = Header('this is a  test where we need to have more than one line '
1099            'before; our final line that is just too big to fit;; '
1100            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1101            'be_on_a_line_all_by_itself;')
1102        eq(h.encode(), """\
1103this is a  test where we need to have more than one line before;
1104 our final line that is just too big to fit;;
1105 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
1106
1107    def test_overlong_last_part_followed_by_split_point(self):
1108        eq = self.ndiffAssertEqual
1109        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1110            'be_on_a_line_all_by_itself ')
1111        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
1112                        "should_be_on_a_line_all_by_itself ")
1113
1114    def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
1115        eq = self.ndiffAssertEqual
1116        h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
1117            'before_our_final_line_; ; '
1118            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1119            'be_on_a_line_all_by_itself; ')
1120        eq(h.encode(), """\
1121this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
1122 ;
1123 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1124
1125    def test_multiline_with_overlong_last_part_followed_by_split_point(self):
1126        eq = self.ndiffAssertEqual
1127        h = Header('this is a test where we need to have more than one line '
1128            'before our final line; ; '
1129            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1130            'be_on_a_line_all_by_itself; ')
1131        eq(h.encode(), """\
1132this is a test where we need to have more than one line before our final line;
1133 ;
1134 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1135
1136    def test_long_header_with_whitespace_runs(self):
1137        eq = self.ndiffAssertEqual
1138        msg = Message()
1139        msg['From'] = 'test@dom.ain'
1140        msg['References'] = SPACE.join(['<foo@dom.ain>  '] * 10)
1141        msg.set_payload('Test')
1142        sfp = StringIO()
1143        g = Generator(sfp)
1144        g.flatten(msg)
1145        eq(sfp.getvalue(), """\
1146From: test@dom.ain
1147References: <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>
1148   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>
1149   <foo@dom.ain>   <foo@dom.ain>\x20\x20
1150
1151Test""")
1152
1153    def test_long_run_with_semi_header_splitter(self):
1154        eq = self.ndiffAssertEqual
1155        msg = Message()
1156        msg['From'] = 'test@dom.ain'
1157        msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
1158        msg.set_payload('Test')
1159        sfp = StringIO()
1160        g = Generator(sfp)
1161        g.flatten(msg)
1162        eq(sfp.getvalue(), """\
1163From: test@dom.ain
1164References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1165 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1166 <foo@dom.ain>; abc
1167
1168Test""")
1169
1170    def test_splitter_split_on_punctuation_only_if_fws(self):
1171        eq = self.ndiffAssertEqual
1172        msg = Message()
1173        msg['From'] = 'test@dom.ain'
1174        msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1175            'they;arenotlegal;fold,points')
1176        msg.set_payload('Test')
1177        sfp = StringIO()
1178        g = Generator(sfp)
1179        g.flatten(msg)
1180        # XXX the space after the header should not be there.
1181        eq(sfp.getvalue(), """\
1182From: test@dom.ain
1183References:\x20
1184 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1185
1186Test""")
1187
1188    def test_no_split_long_header(self):
1189        eq = self.ndiffAssertEqual
1190        hstr = 'References: ' + 'x' * 80
1191        h = Header(hstr)
1192        # These come on two lines because Headers are really field value
1193        # classes and don't really know about their field names.
1194        eq(h.encode(), """\
1195References:
1196 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1197        h = Header('x' * 80)
1198        eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
1199
1200    def test_splitting_multiple_long_lines(self):
1201        eq = self.ndiffAssertEqual
1202        hstr = """\
1203from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1204\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1205\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1206"""
1207        h = Header(hstr, continuation_ws='\t')
1208        eq(h.encode(), """\
1209from babylon.socal-raves.org (localhost [127.0.0.1]);
1210 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1211 for <mailman-admin@babylon.socal-raves.org>;
1212 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1213\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1214 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1215 for <mailman-admin@babylon.socal-raves.org>;
1216 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1217\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1218 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1219 for <mailman-admin@babylon.socal-raves.org>;
1220 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1221
1222    def test_splitting_first_line_only_is_long(self):
1223        eq = self.ndiffAssertEqual
1224        hstr = """\
1225from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1226\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1227\tid 17k4h5-00034i-00
1228\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1229        h = Header(hstr, maxlinelen=78, header_name='Received',
1230                   continuation_ws='\t')
1231        eq(h.encode(), """\
1232from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1233 helo=cthulhu.gerg.ca)
1234\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1235\tid 17k4h5-00034i-00
1236\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1237
1238    def test_long_8bit_header(self):
1239        eq = self.ndiffAssertEqual
1240        msg = Message()
1241        h = Header('Britische Regierung gibt', 'iso-8859-1',
1242                    header_name='Subject')
1243        h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
1244        eq(h.encode(maxlinelen=76), """\
1245=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1246 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
1247        msg['Subject'] = h
1248        eq(msg.as_string(maxheaderlen=76), """\
1249Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1250 =?iso-8859-1?q?hore-Windkraftprojekte?=
1251
1252""")
1253        eq(msg.as_string(maxheaderlen=0), """\
1254Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
1255
1256""")
1257
1258    def test_long_8bit_header_no_charset(self):
1259        eq = self.ndiffAssertEqual
1260        msg = Message()
1261        header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1262                         'f\xfcr Offshore-Windkraftprojekte '
1263                         '<a-very-long-address@example.com>')
1264        msg['Reply-To'] = header_string
1265        eq(msg.as_string(maxheaderlen=78), """\
1266Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1267 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1268
1269""")
1270        msg = Message()
1271        msg['Reply-To'] = Header(header_string,
1272                                 header_name='Reply-To')
1273        eq(msg.as_string(maxheaderlen=78), """\
1274Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1275 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1276
1277""")
1278
1279    def test_long_to_header(self):
1280        eq = self.ndiffAssertEqual
1281        to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
1282              '<someone@eecs.umich.edu>, '
1283              '"Someone Test #B" <someone@umich.edu>, '
1284              '"Someone Test #C" <someone@eecs.umich.edu>, '
1285              '"Someone Test #D" <someone@eecs.umich.edu>')
1286        msg = Message()
1287        msg['To'] = to
1288        eq(msg.as_string(maxheaderlen=78), '''\
1289To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
1290 "Someone Test #B" <someone@umich.edu>,
1291 "Someone Test #C" <someone@eecs.umich.edu>,
1292 "Someone Test #D" <someone@eecs.umich.edu>
1293
1294''')
1295
1296    def test_long_line_after_append(self):
1297        eq = self.ndiffAssertEqual
1298        s = 'This is an example of string which has almost the limit of header length.'
1299        h = Header(s)
1300        h.append('Add another line.')
1301        eq(h.encode(maxlinelen=76), """\
1302This is an example of string which has almost the limit of header length.
1303 Add another line.""")
1304
1305    def test_shorter_line_with_append(self):
1306        eq = self.ndiffAssertEqual
1307        s = 'This is a shorter line.'
1308        h = Header(s)
1309        h.append('Add another sentence. (Surprise?)')
1310        eq(h.encode(),
1311           'This is a shorter line. Add another sentence. (Surprise?)')
1312
1313    def test_long_field_name(self):
1314        eq = self.ndiffAssertEqual
1315        fn = 'X-Very-Very-Very-Long-Header-Name'
1316        gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1317              'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1318              'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1319              'bef\xf6rdert. ')
1320        h = Header(gs, 'iso-8859-1', header_name=fn)
1321        # BAW: this seems broken because the first line is too long
1322        eq(h.encode(maxlinelen=76), """\
1323=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1324 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1325 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1326 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
1327
1328    def test_long_received_header(self):
1329        h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1330             'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1331             'Wed, 05 Mar 2003 18:10:18 -0700')
1332        msg = Message()
1333        msg['Received-1'] = Header(h, continuation_ws='\t')
1334        msg['Received-2'] = h
1335        # This should be splitting on spaces not semicolons.
1336        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1337Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1338 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1339 Wed, 05 Mar 2003 18:10:18 -0700
1340Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1341 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1342 Wed, 05 Mar 2003 18:10:18 -0700
1343
1344""")
1345
1346    def test_string_headerinst_eq(self):
1347        h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1348             'tu-muenchen.de> (David Bremner\'s message of '
1349             '"Thu, 6 Mar 2003 13:58:21 +0100")')
1350        msg = Message()
1351        msg['Received-1'] = Header(h, header_name='Received-1',
1352                                   continuation_ws='\t')
1353        msg['Received-2'] = h
1354        # XXX The space after the ':' should not be there.
1355        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1356Received-1:\x20
1357 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1358 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1359Received-2:\x20
1360 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1361 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1362
1363""")
1364
1365    def test_long_unbreakable_lines_with_continuation(self):
1366        eq = self.ndiffAssertEqual
1367        msg = Message()
1368        t = """\
1369iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1370 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1371        msg['Face-1'] = t
1372        msg['Face-2'] = Header(t, header_name='Face-2')
1373        msg['Face-3'] = ' ' + t
1374        # XXX This splitting is all wrong.  It the first value line should be
1375        # snug against the field name or the space after the header not there.
1376        eq(msg.as_string(maxheaderlen=78), """\
1377Face-1:\x20
1378 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1379 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1380Face-2:\x20
1381 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1382 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1383Face-3:\x20
1384 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1385 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1386
1387""")
1388
1389    def test_another_long_multiline_header(self):
1390        eq = self.ndiffAssertEqual
1391        m = ('Received: from siimage.com '
1392             '([172.25.1.3]) by zima.siliconimage.com with '
1393             'Microsoft SMTPSVC(5.0.2195.4905); '
1394             'Wed, 16 Oct 2002 07:41:11 -0700')
1395        msg = email.message_from_string(m)
1396        eq(msg.as_string(maxheaderlen=78), '''\
1397Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1398 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
1399
1400''')
1401
1402    def test_long_lines_with_different_header(self):
1403        eq = self.ndiffAssertEqual
1404        h = ('List-Unsubscribe: '
1405             '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1406             '        <mailto:spamassassin-talk-request@lists.sourceforge.net'
1407             '?subject=unsubscribe>')
1408        msg = Message()
1409        msg['List'] = h
1410        msg['List'] = Header(h, header_name='List')
1411        eq(msg.as_string(maxheaderlen=78), """\
1412List: List-Unsubscribe:
1413 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1414        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1415List: List-Unsubscribe:
1416 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1417        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1418
1419""")
1420
1421    def test_long_rfc2047_header_with_embedded_fws(self):
1422        h = Header(textwrap.dedent("""\
1423            We're going to pretend this header is in a non-ascii character set
1424            \tto see if line wrapping with encoded words and embedded
1425               folding white space works"""),
1426                   charset='utf-8',
1427                   header_name='Test')
1428        self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1429            =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1430             =?utf-8?q?cter_set?=
1431             =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1432             =?utf-8?q?_folding_white_space_works?=""")+'\n')
1433
1434
1435
1436# Test mangling of "From " lines in the body of a message
1437class TestFromMangling(unittest.TestCase):
1438    def setUp(self):
1439        self.msg = Message()
1440        self.msg['From'] = 'aaa@bbb.org'
1441        self.msg.set_payload("""\
1442From the desk of A.A.A.:
1443Blah blah blah
1444""")
1445
1446    def test_mangled_from(self):
1447        s = StringIO()
1448        g = Generator(s, mangle_from_=True)
1449        g.flatten(self.msg)
1450        self.assertEqual(s.getvalue(), """\
1451From: aaa@bbb.org
1452
1453>From the desk of A.A.A.:
1454Blah blah blah
1455""")
1456
1457    def test_dont_mangle_from(self):
1458        s = StringIO()
1459        g = Generator(s, mangle_from_=False)
1460        g.flatten(self.msg)
1461        self.assertEqual(s.getvalue(), """\
1462From: aaa@bbb.org
1463
1464From the desk of A.A.A.:
1465Blah blah blah
1466""")
1467
1468    def test_mangle_from_in_preamble_and_epilog(self):
1469        s = StringIO()
1470        g = Generator(s, mangle_from_=True)
1471        msg = email.message_from_string(textwrap.dedent("""\
1472            From: foo@bar.com
1473            Mime-Version: 1.0
1474            Content-Type: multipart/mixed; boundary=XXX
1475
1476            From somewhere unknown
1477
1478            --XXX
1479            Content-Type: text/plain
1480
1481            foo
1482
1483            --XXX--
1484
1485            From somewhere unknowable
1486            """))
1487        g.flatten(msg)
1488        self.assertEqual(len([1 for x in s.getvalue().split('\n')
1489                                  if x.startswith('>From ')]), 2)
1490
1491    def test_mangled_from_with_bad_bytes(self):
1492        source = textwrap.dedent("""\
1493            Content-Type: text/plain; charset="utf-8"
1494            MIME-Version: 1.0
1495            Content-Transfer-Encoding: 8bit
1496            From: aaa@bbb.org
1497
1498        """).encode('utf-8')
1499        msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1500        b = BytesIO()
1501        g = BytesGenerator(b, mangle_from_=True)
1502        g.flatten(msg)
1503        self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1504
1505    def test_multipart_with_bad_bytes_in_cte(self):
1506        # bpo30835
1507        source = textwrap.dedent("""\
1508            From: aperson@example.com
1509            Content-Type: multipart/mixed; boundary="1"
1510            Content-Transfer-Encoding: \xc8
1511        """).encode('utf-8')
1512        msg = email.message_from_bytes(source)
1513
1514
1515# Test the basic MIMEAudio class
1516class TestMIMEAudio(unittest.TestCase):
1517    def setUp(self):
1518        with openfile('audiotest.au', 'rb') as fp:
1519            self._audiodata = fp.read()
1520        self._au = MIMEAudio(self._audiodata)
1521
1522    def test_guess_minor_type(self):
1523        self.assertEqual(self._au.get_content_type(), 'audio/basic')
1524
1525    def test_encoding(self):
1526        payload = self._au.get_payload()
1527        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1528                self._audiodata)
1529
1530    def test_checkSetMinor(self):
1531        au = MIMEAudio(self._audiodata, 'fish')
1532        self.assertEqual(au.get_content_type(), 'audio/fish')
1533
1534    def test_add_header(self):
1535        eq = self.assertEqual
1536        self._au.add_header('Content-Disposition', 'attachment',
1537                            filename='audiotest.au')
1538        eq(self._au['content-disposition'],
1539           'attachment; filename="audiotest.au"')
1540        eq(self._au.get_params(header='content-disposition'),
1541           [('attachment', ''), ('filename', 'audiotest.au')])
1542        eq(self._au.get_param('filename', header='content-disposition'),
1543           'audiotest.au')
1544        missing = []
1545        eq(self._au.get_param('attachment', header='content-disposition'), '')
1546        self.assertIs(self._au.get_param('foo', failobj=missing,
1547                                         header='content-disposition'), missing)
1548        # Try some missing stuff
1549        self.assertIs(self._au.get_param('foobar', missing), missing)
1550        self.assertIs(self._au.get_param('attachment', missing,
1551                                         header='foobar'), missing)
1552
1553
1554
1555# Test the basic MIMEImage class
1556class TestMIMEImage(unittest.TestCase):
1557    def setUp(self):
1558        with openfile('PyBanner048.gif', 'rb') as fp:
1559            self._imgdata = fp.read()
1560        self._im = MIMEImage(self._imgdata)
1561
1562    def test_guess_minor_type(self):
1563        self.assertEqual(self._im.get_content_type(), 'image/gif')
1564
1565    def test_encoding(self):
1566        payload = self._im.get_payload()
1567        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1568                self._imgdata)
1569
1570    def test_checkSetMinor(self):
1571        im = MIMEImage(self._imgdata, 'fish')
1572        self.assertEqual(im.get_content_type(), 'image/fish')
1573
1574    def test_add_header(self):
1575        eq = self.assertEqual
1576        self._im.add_header('Content-Disposition', 'attachment',
1577                            filename='dingusfish.gif')
1578        eq(self._im['content-disposition'],
1579           'attachment; filename="dingusfish.gif"')
1580        eq(self._im.get_params(header='content-disposition'),
1581           [('attachment', ''), ('filename', 'dingusfish.gif')])
1582        eq(self._im.get_param('filename', header='content-disposition'),
1583           'dingusfish.gif')
1584        missing = []
1585        eq(self._im.get_param('attachment', header='content-disposition'), '')
1586        self.assertIs(self._im.get_param('foo', failobj=missing,
1587                                         header='content-disposition'), missing)
1588        # Try some missing stuff
1589        self.assertIs(self._im.get_param('foobar', missing), missing)
1590        self.assertIs(self._im.get_param('attachment', missing,
1591                                         header='foobar'), missing)
1592
1593
1594
1595# Test the basic MIMEApplication class
1596class TestMIMEApplication(unittest.TestCase):
1597    def test_headers(self):
1598        eq = self.assertEqual
1599        msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
1600        eq(msg.get_content_type(), 'application/octet-stream')
1601        eq(msg['content-transfer-encoding'], 'base64')
1602
1603    def test_body(self):
1604        eq = self.assertEqual
1605        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1606        msg = MIMEApplication(bytesdata)
1607        # whitespace in the cte encoded block is RFC-irrelevant.
1608        eq(msg.get_payload().strip(), '+vv8/f7/')
1609        eq(msg.get_payload(decode=True), bytesdata)
1610
1611    def test_binary_body_with_encode_7or8bit(self):
1612        # Issue 17171.
1613        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1614        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1615        # Treated as a string, this will be invalid code points.
1616        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1617        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1618        self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1619        s = BytesIO()
1620        g = BytesGenerator(s)
1621        g.flatten(msg)
1622        wireform = s.getvalue()
1623        msg2 = email.message_from_bytes(wireform)
1624        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1625        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1626        self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1627
1628    def test_binary_body_with_encode_noop(self):
1629        # Issue 16564: This does not produce an RFC valid message, since to be
1630        # valid it should have a CTE of binary.  But the below works in
1631        # Python2, and is documented as working this way.
1632        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1633        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1634        # Treated as a string, this will be invalid code points.
1635        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1636        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1637        s = BytesIO()
1638        g = BytesGenerator(s)
1639        g.flatten(msg)
1640        wireform = s.getvalue()
1641        msg2 = email.message_from_bytes(wireform)
1642        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1643        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1644
1645    def test_binary_body_with_unicode_linend_encode_noop(self):
1646        # Issue 19003: This is a variation on #16564.
1647        bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff'
1648        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1649        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1650        s = BytesIO()
1651        g = BytesGenerator(s)
1652        g.flatten(msg)
1653        wireform = s.getvalue()
1654        msg2 = email.message_from_bytes(wireform)
1655        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1656
1657    def test_binary_body_with_encode_quopri(self):
1658        # Issue 14360.
1659        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1660        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1661        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1662        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1663        self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1664        s = BytesIO()
1665        g = BytesGenerator(s)
1666        g.flatten(msg)
1667        wireform = s.getvalue()
1668        msg2 = email.message_from_bytes(wireform)
1669        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1670        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1671        self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1672
1673    def test_binary_body_with_encode_base64(self):
1674        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1675        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1676        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1677        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1678        s = BytesIO()
1679        g = BytesGenerator(s)
1680        g.flatten(msg)
1681        wireform = s.getvalue()
1682        msg2 = email.message_from_bytes(wireform)
1683        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1684        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1685
1686
1687# Test the basic MIMEText class
1688class TestMIMEText(unittest.TestCase):
1689    def setUp(self):
1690        self._msg = MIMEText('hello there')
1691
1692    def test_types(self):
1693        eq = self.assertEqual
1694        eq(self._msg.get_content_type(), 'text/plain')
1695        eq(self._msg.get_param('charset'), 'us-ascii')
1696        missing = []
1697        self.assertIs(self._msg.get_param('foobar', missing), missing)
1698        self.assertIs(self._msg.get_param('charset', missing, header='foobar'),
1699                      missing)
1700
1701    def test_payload(self):
1702        self.assertEqual(self._msg.get_payload(), 'hello there')
1703        self.assertFalse(self._msg.is_multipart())
1704
1705    def test_charset(self):
1706        eq = self.assertEqual
1707        msg = MIMEText('hello there', _charset='us-ascii')
1708        eq(msg.get_charset().input_charset, 'us-ascii')
1709        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1710        # Also accept a Charset instance
1711        charset = Charset('utf-8')
1712        charset.body_encoding = None
1713        msg = MIMEText('hello there', _charset=charset)
1714        eq(msg.get_charset().input_charset, 'utf-8')
1715        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1716        eq(msg.get_payload(), 'hello there')
1717
1718    def test_7bit_input(self):
1719        eq = self.assertEqual
1720        msg = MIMEText('hello there', _charset='us-ascii')
1721        eq(msg.get_charset().input_charset, 'us-ascii')
1722        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1723
1724    def test_7bit_input_no_charset(self):
1725        eq = self.assertEqual
1726        msg = MIMEText('hello there')
1727        eq(msg.get_charset(), 'us-ascii')
1728        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1729        self.assertIn('hello there', msg.as_string())
1730
1731    def test_utf8_input(self):
1732        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1733        eq = self.assertEqual
1734        msg = MIMEText(teststr, _charset='utf-8')
1735        eq(msg.get_charset().output_charset, 'utf-8')
1736        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1737        eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1738
1739    @unittest.skip("can't fix because of backward compat in email5, "
1740        "will fix in email6")
1741    def test_utf8_input_no_charset(self):
1742        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1743        self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1744
1745
1746
1747# Test complicated multipart/* messages
1748class TestMultipart(TestEmailBase):
1749    def setUp(self):
1750        with openfile('PyBanner048.gif', 'rb') as fp:
1751            data = fp.read()
1752        container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1753        image = MIMEImage(data, name='dingusfish.gif')
1754        image.add_header('content-disposition', 'attachment',
1755                         filename='dingusfish.gif')
1756        intro = MIMEText('''\
1757Hi there,
1758
1759This is the dingus fish.
1760''')
1761        container.attach(intro)
1762        container.attach(image)
1763        container['From'] = 'Barry <barry@digicool.com>'
1764        container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1765        container['Subject'] = 'Here is your dingus fish'
1766
1767        now = 987809702.54848599
1768        timetuple = time.localtime(now)
1769        if timetuple[-1] == 0:
1770            tzsecs = time.timezone
1771        else:
1772            tzsecs = time.altzone
1773        if tzsecs > 0:
1774            sign = '-'
1775        else:
1776            sign = '+'
1777        tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1778        container['Date'] = time.strftime(
1779            '%a, %d %b %Y %H:%M:%S',
1780            time.localtime(now)) + tzoffset
1781        self._msg = container
1782        self._im = image
1783        self._txt = intro
1784
1785    def test_hierarchy(self):
1786        # convenience
1787        eq = self.assertEqual
1788        raises = self.assertRaises
1789        # tests
1790        m = self._msg
1791        self.assertTrue(m.is_multipart())
1792        eq(m.get_content_type(), 'multipart/mixed')
1793        eq(len(m.get_payload()), 2)
1794        raises(IndexError, m.get_payload, 2)
1795        m0 = m.get_payload(0)
1796        m1 = m.get_payload(1)
1797        self.assertIs(m0, self._txt)
1798        self.assertIs(m1, self._im)
1799        eq(m.get_payload(), [m0, m1])
1800        self.assertFalse(m0.is_multipart())
1801        self.assertFalse(m1.is_multipart())
1802
1803    def test_empty_multipart_idempotent(self):
1804        text = """\
1805Content-Type: multipart/mixed; boundary="BOUNDARY"
1806MIME-Version: 1.0
1807Subject: A subject
1808To: aperson@dom.ain
1809From: bperson@dom.ain
1810
1811
1812--BOUNDARY
1813
1814
1815--BOUNDARY--
1816"""
1817        msg = Parser().parsestr(text)
1818        self.ndiffAssertEqual(text, msg.as_string())
1819
1820    def test_no_parts_in_a_multipart_with_none_epilogue(self):
1821        outer = MIMEBase('multipart', 'mixed')
1822        outer['Subject'] = 'A subject'
1823        outer['To'] = 'aperson@dom.ain'
1824        outer['From'] = 'bperson@dom.ain'
1825        outer.set_boundary('BOUNDARY')
1826        self.ndiffAssertEqual(outer.as_string(), '''\
1827Content-Type: multipart/mixed; boundary="BOUNDARY"
1828MIME-Version: 1.0
1829Subject: A subject
1830To: aperson@dom.ain
1831From: bperson@dom.ain
1832
1833--BOUNDARY
1834
1835--BOUNDARY--
1836''')
1837
1838    def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1839        outer = MIMEBase('multipart', 'mixed')
1840        outer['Subject'] = 'A subject'
1841        outer['To'] = 'aperson@dom.ain'
1842        outer['From'] = 'bperson@dom.ain'
1843        outer.preamble = ''
1844        outer.epilogue = ''
1845        outer.set_boundary('BOUNDARY')
1846        self.ndiffAssertEqual(outer.as_string(), '''\
1847Content-Type: multipart/mixed; boundary="BOUNDARY"
1848MIME-Version: 1.0
1849Subject: A subject
1850To: aperson@dom.ain
1851From: bperson@dom.ain
1852
1853
1854--BOUNDARY
1855
1856--BOUNDARY--
1857''')
1858
1859    def test_one_part_in_a_multipart(self):
1860        eq = self.ndiffAssertEqual
1861        outer = MIMEBase('multipart', 'mixed')
1862        outer['Subject'] = 'A subject'
1863        outer['To'] = 'aperson@dom.ain'
1864        outer['From'] = 'bperson@dom.ain'
1865        outer.set_boundary('BOUNDARY')
1866        msg = MIMEText('hello world')
1867        outer.attach(msg)
1868        eq(outer.as_string(), '''\
1869Content-Type: multipart/mixed; boundary="BOUNDARY"
1870MIME-Version: 1.0
1871Subject: A subject
1872To: aperson@dom.ain
1873From: bperson@dom.ain
1874
1875--BOUNDARY
1876Content-Type: text/plain; charset="us-ascii"
1877MIME-Version: 1.0
1878Content-Transfer-Encoding: 7bit
1879
1880hello world
1881--BOUNDARY--
1882''')
1883
1884    def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1885        eq = self.ndiffAssertEqual
1886        outer = MIMEBase('multipart', 'mixed')
1887        outer['Subject'] = 'A subject'
1888        outer['To'] = 'aperson@dom.ain'
1889        outer['From'] = 'bperson@dom.ain'
1890        outer.preamble = ''
1891        msg = MIMEText('hello world')
1892        outer.attach(msg)
1893        outer.set_boundary('BOUNDARY')
1894        eq(outer.as_string(), '''\
1895Content-Type: multipart/mixed; boundary="BOUNDARY"
1896MIME-Version: 1.0
1897Subject: A subject
1898To: aperson@dom.ain
1899From: bperson@dom.ain
1900
1901
1902--BOUNDARY
1903Content-Type: text/plain; charset="us-ascii"
1904MIME-Version: 1.0
1905Content-Transfer-Encoding: 7bit
1906
1907hello world
1908--BOUNDARY--
1909''')
1910
1911
1912    def test_seq_parts_in_a_multipart_with_none_preamble(self):
1913        eq = self.ndiffAssertEqual
1914        outer = MIMEBase('multipart', 'mixed')
1915        outer['Subject'] = 'A subject'
1916        outer['To'] = 'aperson@dom.ain'
1917        outer['From'] = 'bperson@dom.ain'
1918        outer.preamble = None
1919        msg = MIMEText('hello world')
1920        outer.attach(msg)
1921        outer.set_boundary('BOUNDARY')
1922        eq(outer.as_string(), '''\
1923Content-Type: multipart/mixed; boundary="BOUNDARY"
1924MIME-Version: 1.0
1925Subject: A subject
1926To: aperson@dom.ain
1927From: bperson@dom.ain
1928
1929--BOUNDARY
1930Content-Type: text/plain; charset="us-ascii"
1931MIME-Version: 1.0
1932Content-Transfer-Encoding: 7bit
1933
1934hello world
1935--BOUNDARY--
1936''')
1937
1938
1939    def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1940        eq = self.ndiffAssertEqual
1941        outer = MIMEBase('multipart', 'mixed')
1942        outer['Subject'] = 'A subject'
1943        outer['To'] = 'aperson@dom.ain'
1944        outer['From'] = 'bperson@dom.ain'
1945        outer.epilogue = None
1946        msg = MIMEText('hello world')
1947        outer.attach(msg)
1948        outer.set_boundary('BOUNDARY')
1949        eq(outer.as_string(), '''\
1950Content-Type: multipart/mixed; boundary="BOUNDARY"
1951MIME-Version: 1.0
1952Subject: A subject
1953To: aperson@dom.ain
1954From: bperson@dom.ain
1955
1956--BOUNDARY
1957Content-Type: text/plain; charset="us-ascii"
1958MIME-Version: 1.0
1959Content-Transfer-Encoding: 7bit
1960
1961hello world
1962--BOUNDARY--
1963''')
1964
1965
1966    def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1967        eq = self.ndiffAssertEqual
1968        outer = MIMEBase('multipart', 'mixed')
1969        outer['Subject'] = 'A subject'
1970        outer['To'] = 'aperson@dom.ain'
1971        outer['From'] = 'bperson@dom.ain'
1972        outer.epilogue = ''
1973        msg = MIMEText('hello world')
1974        outer.attach(msg)
1975        outer.set_boundary('BOUNDARY')
1976        eq(outer.as_string(), '''\
1977Content-Type: multipart/mixed; boundary="BOUNDARY"
1978MIME-Version: 1.0
1979Subject: A subject
1980To: aperson@dom.ain
1981From: bperson@dom.ain
1982
1983--BOUNDARY
1984Content-Type: text/plain; charset="us-ascii"
1985MIME-Version: 1.0
1986Content-Transfer-Encoding: 7bit
1987
1988hello world
1989--BOUNDARY--
1990''')
1991
1992
1993    def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1994        eq = self.ndiffAssertEqual
1995        outer = MIMEBase('multipart', 'mixed')
1996        outer['Subject'] = 'A subject'
1997        outer['To'] = 'aperson@dom.ain'
1998        outer['From'] = 'bperson@dom.ain'
1999        outer.epilogue = '\n'
2000        msg = MIMEText('hello world')
2001        outer.attach(msg)
2002        outer.set_boundary('BOUNDARY')
2003        eq(outer.as_string(), '''\
2004Content-Type: multipart/mixed; boundary="BOUNDARY"
2005MIME-Version: 1.0
2006Subject: A subject
2007To: aperson@dom.ain
2008From: bperson@dom.ain
2009
2010--BOUNDARY
2011Content-Type: text/plain; charset="us-ascii"
2012MIME-Version: 1.0
2013Content-Transfer-Encoding: 7bit
2014
2015hello world
2016--BOUNDARY--
2017
2018''')
2019
2020    def test_message_external_body(self):
2021        eq = self.assertEqual
2022        msg = self._msgobj('msg_36.txt')
2023        eq(len(msg.get_payload()), 2)
2024        msg1 = msg.get_payload(1)
2025        eq(msg1.get_content_type(), 'multipart/alternative')
2026        eq(len(msg1.get_payload()), 2)
2027        for subpart in msg1.get_payload():
2028            eq(subpart.get_content_type(), 'message/external-body')
2029            eq(len(subpart.get_payload()), 1)
2030            subsubpart = subpart.get_payload(0)
2031            eq(subsubpart.get_content_type(), 'text/plain')
2032
2033    def test_double_boundary(self):
2034        # msg_37.txt is a multipart that contains two dash-boundary's in a
2035        # row.  Our interpretation of RFC 2046 calls for ignoring the second
2036        # and subsequent boundaries.
2037        msg = self._msgobj('msg_37.txt')
2038        self.assertEqual(len(msg.get_payload()), 3)
2039
2040    def test_nested_inner_contains_outer_boundary(self):
2041        eq = self.ndiffAssertEqual
2042        # msg_38.txt has an inner part that contains outer boundaries.  My
2043        # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
2044        # these are illegal and should be interpreted as unterminated inner
2045        # parts.
2046        msg = self._msgobj('msg_38.txt')
2047        sfp = StringIO()
2048        iterators._structure(msg, sfp)
2049        eq(sfp.getvalue(), """\
2050multipart/mixed
2051    multipart/mixed
2052        multipart/alternative
2053            text/plain
2054        text/plain
2055    text/plain
2056    text/plain
2057""")
2058
2059    def test_nested_with_same_boundary(self):
2060        eq = self.ndiffAssertEqual
2061        # msg 39.txt is similarly evil in that it's got inner parts that use
2062        # the same boundary as outer parts.  Again, I believe the way this is
2063        # parsed is closest to the spirit of RFC 2046
2064        msg = self._msgobj('msg_39.txt')
2065        sfp = StringIO()
2066        iterators._structure(msg, sfp)
2067        eq(sfp.getvalue(), """\
2068multipart/mixed
2069    multipart/mixed
2070        multipart/alternative
2071        application/octet-stream
2072        application/octet-stream
2073    text/plain
2074""")
2075
2076    def test_boundary_in_non_multipart(self):
2077        msg = self._msgobj('msg_40.txt')
2078        self.assertEqual(msg.as_string(), '''\
2079MIME-Version: 1.0
2080Content-Type: text/html; boundary="--961284236552522269"
2081
2082----961284236552522269
2083Content-Type: text/html;
2084Content-Transfer-Encoding: 7Bit
2085
2086<html></html>
2087
2088----961284236552522269--
2089''')
2090
2091    def test_boundary_with_leading_space(self):
2092        eq = self.assertEqual
2093        msg = email.message_from_string('''\
2094MIME-Version: 1.0
2095Content-Type: multipart/mixed; boundary="    XXXX"
2096
2097--    XXXX
2098Content-Type: text/plain
2099
2100
2101--    XXXX
2102Content-Type: text/plain
2103
2104--    XXXX--
2105''')
2106        self.assertTrue(msg.is_multipart())
2107        eq(msg.get_boundary(), '    XXXX')
2108        eq(len(msg.get_payload()), 2)
2109
2110    def test_boundary_without_trailing_newline(self):
2111        m = Parser().parsestr("""\
2112Content-Type: multipart/mixed; boundary="===============0012394164=="
2113MIME-Version: 1.0
2114
2115--===============0012394164==
2116Content-Type: image/file1.jpg
2117MIME-Version: 1.0
2118Content-Transfer-Encoding: base64
2119
2120YXNkZg==
2121--===============0012394164==--""")
2122        self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
2123
2124    def test_mimebase_default_policy(self):
2125        m = MIMEBase('multipart', 'mixed')
2126        self.assertIs(m.policy, email.policy.compat32)
2127
2128    def test_mimebase_custom_policy(self):
2129        m = MIMEBase('multipart', 'mixed', policy=email.policy.default)
2130        self.assertIs(m.policy, email.policy.default)
2131
2132# Test some badly formatted messages
2133class TestNonConformant(TestEmailBase):
2134
2135    def test_parse_missing_minor_type(self):
2136        eq = self.assertEqual
2137        msg = self._msgobj('msg_14.txt')
2138        eq(msg.get_content_type(), 'text/plain')
2139        eq(msg.get_content_maintype(), 'text')
2140        eq(msg.get_content_subtype(), 'plain')
2141
2142    # test_defect_handling
2143    def test_same_boundary_inner_outer(self):
2144        msg = self._msgobj('msg_15.txt')
2145        # XXX We can probably eventually do better
2146        inner = msg.get_payload(0)
2147        self.assertTrue(hasattr(inner, 'defects'))
2148        self.assertEqual(len(inner.defects), 1)
2149        self.assertIsInstance(inner.defects[0],
2150                              errors.StartBoundaryNotFoundDefect)
2151
2152    # test_defect_handling
2153    def test_multipart_no_boundary(self):
2154        msg = self._msgobj('msg_25.txt')
2155        self.assertIsInstance(msg.get_payload(), str)
2156        self.assertEqual(len(msg.defects), 2)
2157        self.assertIsInstance(msg.defects[0],
2158                              errors.NoBoundaryInMultipartDefect)
2159        self.assertIsInstance(msg.defects[1],
2160                              errors.MultipartInvariantViolationDefect)
2161
2162    multipart_msg = textwrap.dedent("""\
2163        Date: Wed, 14 Nov 2007 12:56:23 GMT
2164        From: foo@bar.invalid
2165        To: foo@bar.invalid
2166        Subject: Content-Transfer-Encoding: base64 and multipart
2167        MIME-Version: 1.0
2168        Content-Type: multipart/mixed;
2169            boundary="===============3344438784458119861=="{}
2170
2171        --===============3344438784458119861==
2172        Content-Type: text/plain
2173
2174        Test message
2175
2176        --===============3344438784458119861==
2177        Content-Type: application/octet-stream
2178        Content-Transfer-Encoding: base64
2179
2180        YWJj
2181
2182        --===============3344438784458119861==--
2183        """)
2184
2185    # test_defect_handling
2186    def test_multipart_invalid_cte(self):
2187        msg = self._str_msg(
2188            self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2189        self.assertEqual(len(msg.defects), 1)
2190        self.assertIsInstance(msg.defects[0],
2191            errors.InvalidMultipartContentTransferEncodingDefect)
2192
2193    # test_defect_handling
2194    def test_multipart_no_cte_no_defect(self):
2195        msg = self._str_msg(self.multipart_msg.format(''))
2196        self.assertEqual(len(msg.defects), 0)
2197
2198    # test_defect_handling
2199    def test_multipart_valid_cte_no_defect(self):
2200        for cte in ('7bit', '8bit', 'BINary'):
2201            msg = self._str_msg(
2202                self.multipart_msg.format(
2203                    "\nContent-Transfer-Encoding: {}".format(cte)))
2204            self.assertEqual(len(msg.defects), 0)
2205
2206    # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
2207    def test_invalid_content_type(self):
2208        eq = self.assertEqual
2209        neq = self.ndiffAssertEqual
2210        msg = Message()
2211        # RFC 2045, $5.2 says invalid yields text/plain
2212        msg['Content-Type'] = 'text'
2213        eq(msg.get_content_maintype(), 'text')
2214        eq(msg.get_content_subtype(), 'plain')
2215        eq(msg.get_content_type(), 'text/plain')
2216        # Clear the old value and try something /really/ invalid
2217        del msg['content-type']
2218        msg['Content-Type'] = 'foo'
2219        eq(msg.get_content_maintype(), 'text')
2220        eq(msg.get_content_subtype(), 'plain')
2221        eq(msg.get_content_type(), 'text/plain')
2222        # Still, make sure that the message is idempotently generated
2223        s = StringIO()
2224        g = Generator(s)
2225        g.flatten(msg)
2226        neq(s.getvalue(), 'Content-Type: foo\n\n')
2227
2228    def test_no_start_boundary(self):
2229        eq = self.ndiffAssertEqual
2230        msg = self._msgobj('msg_31.txt')
2231        eq(msg.get_payload(), """\
2232--BOUNDARY
2233Content-Type: text/plain
2234
2235message 1
2236
2237--BOUNDARY
2238Content-Type: text/plain
2239
2240message 2
2241
2242--BOUNDARY--
2243""")
2244
2245    def test_no_separating_blank_line(self):
2246        eq = self.ndiffAssertEqual
2247        msg = self._msgobj('msg_35.txt')
2248        eq(msg.as_string(), """\
2249From: aperson@dom.ain
2250To: bperson@dom.ain
2251Subject: here's something interesting
2252
2253counter to RFC 2822, there's no separating newline here
2254""")
2255
2256    # test_defect_handling
2257    def test_lying_multipart(self):
2258        msg = self._msgobj('msg_41.txt')
2259        self.assertTrue(hasattr(msg, 'defects'))
2260        self.assertEqual(len(msg.defects), 2)
2261        self.assertIsInstance(msg.defects[0],
2262                              errors.NoBoundaryInMultipartDefect)
2263        self.assertIsInstance(msg.defects[1],
2264                              errors.MultipartInvariantViolationDefect)
2265
2266    # test_defect_handling
2267    def test_missing_start_boundary(self):
2268        outer = self._msgobj('msg_42.txt')
2269        # The message structure is:
2270        #
2271        # multipart/mixed
2272        #    text/plain
2273        #    message/rfc822
2274        #        multipart/mixed [*]
2275        #
2276        # [*] This message is missing its start boundary
2277        bad = outer.get_payload(1).get_payload(0)
2278        self.assertEqual(len(bad.defects), 1)
2279        self.assertIsInstance(bad.defects[0],
2280                              errors.StartBoundaryNotFoundDefect)
2281
2282    # test_defect_handling
2283    def test_first_line_is_continuation_header(self):
2284        eq = self.assertEqual
2285        m = ' Line 1\nSubject: test\n\nbody'
2286        msg = email.message_from_string(m)
2287        eq(msg.keys(), ['Subject'])
2288        eq(msg.get_payload(), 'body')
2289        eq(len(msg.defects), 1)
2290        self.assertDefectsEqual(msg.defects,
2291                                 [errors.FirstHeaderLineIsContinuationDefect])
2292        eq(msg.defects[0].line, ' Line 1\n')
2293
2294    # test_defect_handling
2295    def test_missing_header_body_separator(self):
2296        # Our heuristic if we see a line that doesn't look like a header (no
2297        # leading whitespace but no ':') is to assume that the blank line that
2298        # separates the header from the body is missing, and to stop parsing
2299        # headers and start parsing the body.
2300        msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2301        self.assertEqual(msg.keys(), ['Subject'])
2302        self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2303        self.assertDefectsEqual(msg.defects,
2304                                [errors.MissingHeaderBodySeparatorDefect])
2305
2306
2307# Test RFC 2047 header encoding and decoding
2308class TestRFC2047(TestEmailBase):
2309    def test_rfc2047_multiline(self):
2310        eq = self.assertEqual
2311        s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2312 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2313        dh = decode_header(s)
2314        eq(dh, [
2315            (b'Re: ', None),
2316            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2317            (b' baz foo bar ', None),
2318            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2319        header = make_header(dh)
2320        eq(str(header),
2321           'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
2322        self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
2323Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2324 =?mac-iceland?q?=9Arg=8Cs?=""")
2325
2326    def test_whitespace_keeper_unicode(self):
2327        eq = self.assertEqual
2328        s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2329        dh = decode_header(s)
2330        eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2331                (b' Pirard <pirard@dom.ain>', None)])
2332        header = str(make_header(dh))
2333        eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2334
2335    def test_whitespace_keeper_unicode_2(self):
2336        eq = self.assertEqual
2337        s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2338        dh = decode_header(s)
2339        eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2340                (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
2341        hu = str(make_header(dh))
2342        eq(hu, 'The quick brown fox jumped over the lazy dog')
2343
2344    def test_rfc2047_missing_whitespace(self):
2345        s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2346        dh = decode_header(s)
2347        self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2348                              (b'rg', None), (b'\xe5', 'iso-8859-1'),
2349                              (b'sbord', None)])
2350
2351    def test_rfc2047_with_whitespace(self):
2352        s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2353        dh = decode_header(s)
2354        self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2355                              (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2356                              (b' sbord', None)])
2357
2358    def test_rfc2047_B_bad_padding(self):
2359        s = '=?iso-8859-1?B?%s?='
2360        data = [                                # only test complete bytes
2361            ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2362            ('dmk=', b'vi'), ('dmk', b'vi')
2363          ]
2364        for q, a in data:
2365            dh = decode_header(s % q)
2366            self.assertEqual(dh, [(a, 'iso-8859-1')])
2367
2368    def test_rfc2047_Q_invalid_digits(self):
2369        # issue 10004.
2370        s = '=?iso-8859-1?Q?andr=e9=zz?='
2371        self.assertEqual(decode_header(s),
2372                        [(b'andr\xe9=zz', 'iso-8859-1')])
2373
2374    def test_rfc2047_rfc2047_1(self):
2375        # 1st testcase at end of rfc2047
2376        s = '(=?ISO-8859-1?Q?a?=)'
2377        self.assertEqual(decode_header(s),
2378            [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2379
2380    def test_rfc2047_rfc2047_2(self):
2381        # 2nd testcase at end of rfc2047
2382        s = '(=?ISO-8859-1?Q?a?= b)'
2383        self.assertEqual(decode_header(s),
2384            [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2385
2386    def test_rfc2047_rfc2047_3(self):
2387        # 3rd testcase at end of rfc2047
2388        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2389        self.assertEqual(decode_header(s),
2390            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2391
2392    def test_rfc2047_rfc2047_4(self):
2393        # 4th testcase at end of rfc2047
2394        s = '(=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=)'
2395        self.assertEqual(decode_header(s),
2396            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2397
2398    def test_rfc2047_rfc2047_5a(self):
2399        # 5th testcase at end of rfc2047 newline is \r\n
2400        s = '(=?ISO-8859-1?Q?a?=\r\n    =?ISO-8859-1?Q?b?=)'
2401        self.assertEqual(decode_header(s),
2402            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2403
2404    def test_rfc2047_rfc2047_5b(self):
2405        # 5th testcase at end of rfc2047 newline is \n
2406        s = '(=?ISO-8859-1?Q?a?=\n    =?ISO-8859-1?Q?b?=)'
2407        self.assertEqual(decode_header(s),
2408            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2409
2410    def test_rfc2047_rfc2047_6(self):
2411        # 6th testcase at end of rfc2047
2412        s = '(=?ISO-8859-1?Q?a_b?=)'
2413        self.assertEqual(decode_header(s),
2414            [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2415
2416    def test_rfc2047_rfc2047_7(self):
2417        # 7th testcase at end of rfc2047
2418        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2419        self.assertEqual(decode_header(s),
2420            [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2421             (b')', None)])
2422        self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2423        self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2424
2425    def test_multiline_header(self):
2426        s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2427        self.assertEqual(decode_header(s),
2428            [(b'"M\xfcller T"', 'windows-1252'),
2429             (b'<T.Mueller@xxx.com>', None)])
2430        self.assertEqual(make_header(decode_header(s)).encode(),
2431                         ''.join(s.splitlines()))
2432        self.assertEqual(str(make_header(decode_header(s))),
2433                         '"Müller T" <T.Mueller@xxx.com>')
2434
2435
2436# Test the MIMEMessage class
2437class TestMIMEMessage(TestEmailBase):
2438    def setUp(self):
2439        with openfile('msg_11.txt', encoding="utf-8") as fp:
2440            self._text = fp.read()
2441
2442    def test_type_error(self):
2443        self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2444
2445    def test_valid_argument(self):
2446        eq = self.assertEqual
2447        subject = 'A sub-message'
2448        m = Message()
2449        m['Subject'] = subject
2450        r = MIMEMessage(m)
2451        eq(r.get_content_type(), 'message/rfc822')
2452        payload = r.get_payload()
2453        self.assertIsInstance(payload, list)
2454        eq(len(payload), 1)
2455        subpart = payload[0]
2456        self.assertIs(subpart, m)
2457        eq(subpart['subject'], subject)
2458
2459    def test_bad_multipart(self):
2460        msg1 = Message()
2461        msg1['Subject'] = 'subpart 1'
2462        msg2 = Message()
2463        msg2['Subject'] = 'subpart 2'
2464        r = MIMEMessage(msg1)
2465        self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2466
2467    def test_generate(self):
2468        # First craft the message to be encapsulated
2469        m = Message()
2470        m['Subject'] = 'An enclosed message'
2471        m.set_payload('Here is the body of the message.\n')
2472        r = MIMEMessage(m)
2473        r['Subject'] = 'The enclosing message'
2474        s = StringIO()
2475        g = Generator(s)
2476        g.flatten(r)
2477        self.assertEqual(s.getvalue(), """\
2478Content-Type: message/rfc822
2479MIME-Version: 1.0
2480Subject: The enclosing message
2481
2482Subject: An enclosed message
2483
2484Here is the body of the message.
2485""")
2486
2487    def test_parse_message_rfc822(self):
2488        eq = self.assertEqual
2489        msg = self._msgobj('msg_11.txt')
2490        eq(msg.get_content_type(), 'message/rfc822')
2491        payload = msg.get_payload()
2492        self.assertIsInstance(payload, list)
2493        eq(len(payload), 1)
2494        submsg = payload[0]
2495        self.assertIsInstance(submsg, Message)
2496        eq(submsg['subject'], 'An enclosed message')
2497        eq(submsg.get_payload(), 'Here is the body of the message.\n')
2498
2499    def test_dsn(self):
2500        eq = self.assertEqual
2501        # msg 16 is a Delivery Status Notification, see RFC 1894
2502        msg = self._msgobj('msg_16.txt')
2503        eq(msg.get_content_type(), 'multipart/report')
2504        self.assertTrue(msg.is_multipart())
2505        eq(len(msg.get_payload()), 3)
2506        # Subpart 1 is a text/plain, human readable section
2507        subpart = msg.get_payload(0)
2508        eq(subpart.get_content_type(), 'text/plain')
2509        eq(subpart.get_payload(), """\
2510This report relates to a message you sent with the following header fields:
2511
2512  Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2513  Date: Sun, 23 Sep 2001 20:10:55 -0700
2514  From: "Ian T. Henry" <henryi@oxy.edu>
2515  To: SoCal Raves <scr@socal-raves.org>
2516  Subject: [scr] yeah for Ians!!
2517
2518Your message cannot be delivered to the following recipients:
2519
2520  Recipient address: jangel1@cougar.noc.ucla.edu
2521  Reason: recipient reached disk quota
2522
2523""")
2524        # Subpart 2 contains the machine parsable DSN information.  It
2525        # consists of two blocks of headers, represented by two nested Message
2526        # objects.
2527        subpart = msg.get_payload(1)
2528        eq(subpart.get_content_type(), 'message/delivery-status')
2529        eq(len(subpart.get_payload()), 2)
2530        # message/delivery-status should treat each block as a bunch of
2531        # headers, i.e. a bunch of Message objects.
2532        dsn1 = subpart.get_payload(0)
2533        self.assertIsInstance(dsn1, Message)
2534        eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2535        eq(dsn1.get_param('dns', header='reporting-mta'), '')
2536        # Try a missing one <wink>
2537        eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2538        dsn2 = subpart.get_payload(1)
2539        self.assertIsInstance(dsn2, Message)
2540        eq(dsn2['action'], 'failed')
2541        eq(dsn2.get_params(header='original-recipient'),
2542           [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2543        eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2544        # Subpart 3 is the original message
2545        subpart = msg.get_payload(2)
2546        eq(subpart.get_content_type(), 'message/rfc822')
2547        payload = subpart.get_payload()
2548        self.assertIsInstance(payload, list)
2549        eq(len(payload), 1)
2550        subsubpart = payload[0]
2551        self.assertIsInstance(subsubpart, Message)
2552        eq(subsubpart.get_content_type(), 'text/plain')
2553        eq(subsubpart['message-id'],
2554           '<002001c144a6$8752e060$56104586@oxy.edu>')
2555
2556    def test_epilogue(self):
2557        eq = self.ndiffAssertEqual
2558        with openfile('msg_21.txt', encoding="utf-8") as fp:
2559            text = fp.read()
2560        msg = Message()
2561        msg['From'] = 'aperson@dom.ain'
2562        msg['To'] = 'bperson@dom.ain'
2563        msg['Subject'] = 'Test'
2564        msg.preamble = 'MIME message'
2565        msg.epilogue = 'End of MIME message\n'
2566        msg1 = MIMEText('One')
2567        msg2 = MIMEText('Two')
2568        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2569        msg.attach(msg1)
2570        msg.attach(msg2)
2571        sfp = StringIO()
2572        g = Generator(sfp)
2573        g.flatten(msg)
2574        eq(sfp.getvalue(), text)
2575
2576    def test_no_nl_preamble(self):
2577        eq = self.ndiffAssertEqual
2578        msg = Message()
2579        msg['From'] = 'aperson@dom.ain'
2580        msg['To'] = 'bperson@dom.ain'
2581        msg['Subject'] = 'Test'
2582        msg.preamble = 'MIME message'
2583        msg.epilogue = ''
2584        msg1 = MIMEText('One')
2585        msg2 = MIMEText('Two')
2586        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2587        msg.attach(msg1)
2588        msg.attach(msg2)
2589        eq(msg.as_string(), """\
2590From: aperson@dom.ain
2591To: bperson@dom.ain
2592Subject: Test
2593Content-Type: multipart/mixed; boundary="BOUNDARY"
2594
2595MIME message
2596--BOUNDARY
2597Content-Type: text/plain; charset="us-ascii"
2598MIME-Version: 1.0
2599Content-Transfer-Encoding: 7bit
2600
2601One
2602--BOUNDARY
2603Content-Type: text/plain; charset="us-ascii"
2604MIME-Version: 1.0
2605Content-Transfer-Encoding: 7bit
2606
2607Two
2608--BOUNDARY--
2609""")
2610
2611    def test_default_type(self):
2612        eq = self.assertEqual
2613        with openfile('msg_30.txt', encoding="utf-8") as fp:
2614            msg = email.message_from_file(fp)
2615        container1 = msg.get_payload(0)
2616        eq(container1.get_default_type(), 'message/rfc822')
2617        eq(container1.get_content_type(), 'message/rfc822')
2618        container2 = msg.get_payload(1)
2619        eq(container2.get_default_type(), 'message/rfc822')
2620        eq(container2.get_content_type(), 'message/rfc822')
2621        container1a = container1.get_payload(0)
2622        eq(container1a.get_default_type(), 'text/plain')
2623        eq(container1a.get_content_type(), 'text/plain')
2624        container2a = container2.get_payload(0)
2625        eq(container2a.get_default_type(), 'text/plain')
2626        eq(container2a.get_content_type(), 'text/plain')
2627
2628    def test_default_type_with_explicit_container_type(self):
2629        eq = self.assertEqual
2630        with openfile('msg_28.txt', encoding="utf-8") as fp:
2631            msg = email.message_from_file(fp)
2632        container1 = msg.get_payload(0)
2633        eq(container1.get_default_type(), 'message/rfc822')
2634        eq(container1.get_content_type(), 'message/rfc822')
2635        container2 = msg.get_payload(1)
2636        eq(container2.get_default_type(), 'message/rfc822')
2637        eq(container2.get_content_type(), 'message/rfc822')
2638        container1a = container1.get_payload(0)
2639        eq(container1a.get_default_type(), 'text/plain')
2640        eq(container1a.get_content_type(), 'text/plain')
2641        container2a = container2.get_payload(0)
2642        eq(container2a.get_default_type(), 'text/plain')
2643        eq(container2a.get_content_type(), 'text/plain')
2644
2645    def test_default_type_non_parsed(self):
2646        eq = self.assertEqual
2647        neq = self.ndiffAssertEqual
2648        # Set up container
2649        container = MIMEMultipart('digest', 'BOUNDARY')
2650        container.epilogue = ''
2651        # Set up subparts
2652        subpart1a = MIMEText('message 1\n')
2653        subpart2a = MIMEText('message 2\n')
2654        subpart1 = MIMEMessage(subpart1a)
2655        subpart2 = MIMEMessage(subpart2a)
2656        container.attach(subpart1)
2657        container.attach(subpart2)
2658        eq(subpart1.get_content_type(), 'message/rfc822')
2659        eq(subpart1.get_default_type(), 'message/rfc822')
2660        eq(subpart2.get_content_type(), 'message/rfc822')
2661        eq(subpart2.get_default_type(), 'message/rfc822')
2662        neq(container.as_string(0), '''\
2663Content-Type: multipart/digest; boundary="BOUNDARY"
2664MIME-Version: 1.0
2665
2666--BOUNDARY
2667Content-Type: message/rfc822
2668MIME-Version: 1.0
2669
2670Content-Type: text/plain; charset="us-ascii"
2671MIME-Version: 1.0
2672Content-Transfer-Encoding: 7bit
2673
2674message 1
2675
2676--BOUNDARY
2677Content-Type: message/rfc822
2678MIME-Version: 1.0
2679
2680Content-Type: text/plain; charset="us-ascii"
2681MIME-Version: 1.0
2682Content-Transfer-Encoding: 7bit
2683
2684message 2
2685
2686--BOUNDARY--
2687''')
2688        del subpart1['content-type']
2689        del subpart1['mime-version']
2690        del subpart2['content-type']
2691        del subpart2['mime-version']
2692        eq(subpart1.get_content_type(), 'message/rfc822')
2693        eq(subpart1.get_default_type(), 'message/rfc822')
2694        eq(subpart2.get_content_type(), 'message/rfc822')
2695        eq(subpart2.get_default_type(), 'message/rfc822')
2696        neq(container.as_string(0), '''\
2697Content-Type: multipart/digest; boundary="BOUNDARY"
2698MIME-Version: 1.0
2699
2700--BOUNDARY
2701
2702Content-Type: text/plain; charset="us-ascii"
2703MIME-Version: 1.0
2704Content-Transfer-Encoding: 7bit
2705
2706message 1
2707
2708--BOUNDARY
2709
2710Content-Type: text/plain; charset="us-ascii"
2711MIME-Version: 1.0
2712Content-Transfer-Encoding: 7bit
2713
2714message 2
2715
2716--BOUNDARY--
2717''')
2718
2719    def test_mime_attachments_in_constructor(self):
2720        eq = self.assertEqual
2721        text1 = MIMEText('')
2722        text2 = MIMEText('')
2723        msg = MIMEMultipart(_subparts=(text1, text2))
2724        eq(len(msg.get_payload()), 2)
2725        eq(msg.get_payload(0), text1)
2726        eq(msg.get_payload(1), text2)
2727
2728    def test_default_multipart_constructor(self):
2729        msg = MIMEMultipart()
2730        self.assertTrue(msg.is_multipart())
2731
2732    def test_multipart_default_policy(self):
2733        msg = MIMEMultipart()
2734        msg['To'] = 'a@b.com'
2735        msg['To'] = 'c@d.com'
2736        self.assertEqual(msg.get_all('to'), ['a@b.com', 'c@d.com'])
2737
2738    def test_multipart_custom_policy(self):
2739        msg = MIMEMultipart(policy=email.policy.default)
2740        msg['To'] = 'a@b.com'
2741        with self.assertRaises(ValueError) as cm:
2742            msg['To'] = 'c@d.com'
2743        self.assertEqual(str(cm.exception),
2744                         'There may be at most 1 To headers in a message')
2745
2746
2747# Test the NonMultipart class
2748class TestNonMultipart(TestEmailBase):
2749    def test_nonmultipart_is_not_multipart(self):
2750        msg = MIMENonMultipart('text', 'plain')
2751        self.assertFalse(msg.is_multipart())
2752
2753    def test_attach_raises_exception(self):
2754        msg = Message()
2755        msg['Subject'] = 'subpart 1'
2756        r = MIMENonMultipart('text', 'plain')
2757        self.assertRaises(errors.MultipartConversionError, r.attach, msg)
2758
2759
2760# A general test of parser->model->generator idempotency.  IOW, read a message
2761# in, parse it into a message object tree, then without touching the tree,
2762# regenerate the plain text.  The original text and the transformed text
2763# should be identical.  Note: that we ignore the Unix-From since that may
2764# contain a changed date.
2765class TestIdempotent(TestEmailBase):
2766
2767    linesep = '\n'
2768
2769    def _msgobj(self, filename):
2770        with openfile(filename, encoding="utf-8") as fp:
2771            data = fp.read()
2772        msg = email.message_from_string(data)
2773        return msg, data
2774
2775    def _idempotent(self, msg, text, unixfrom=False):
2776        eq = self.ndiffAssertEqual
2777        s = StringIO()
2778        g = Generator(s, maxheaderlen=0)
2779        g.flatten(msg, unixfrom=unixfrom)
2780        eq(text, s.getvalue())
2781
2782    def test_parse_text_message(self):
2783        eq = self.assertEqual
2784        msg, text = self._msgobj('msg_01.txt')
2785        eq(msg.get_content_type(), 'text/plain')
2786        eq(msg.get_content_maintype(), 'text')
2787        eq(msg.get_content_subtype(), 'plain')
2788        eq(msg.get_params()[1], ('charset', 'us-ascii'))
2789        eq(msg.get_param('charset'), 'us-ascii')
2790        eq(msg.preamble, None)
2791        eq(msg.epilogue, None)
2792        self._idempotent(msg, text)
2793
2794    def test_parse_untyped_message(self):
2795        eq = self.assertEqual
2796        msg, text = self._msgobj('msg_03.txt')
2797        eq(msg.get_content_type(), 'text/plain')
2798        eq(msg.get_params(), None)
2799        eq(msg.get_param('charset'), None)
2800        self._idempotent(msg, text)
2801
2802    def test_simple_multipart(self):
2803        msg, text = self._msgobj('msg_04.txt')
2804        self._idempotent(msg, text)
2805
2806    def test_MIME_digest(self):
2807        msg, text = self._msgobj('msg_02.txt')
2808        self._idempotent(msg, text)
2809
2810    def test_long_header(self):
2811        msg, text = self._msgobj('msg_27.txt')
2812        self._idempotent(msg, text)
2813
2814    def test_MIME_digest_with_part_headers(self):
2815        msg, text = self._msgobj('msg_28.txt')
2816        self._idempotent(msg, text)
2817
2818    def test_mixed_with_image(self):
2819        msg, text = self._msgobj('msg_06.txt')
2820        self._idempotent(msg, text)
2821
2822    def test_multipart_report(self):
2823        msg, text = self._msgobj('msg_05.txt')
2824        self._idempotent(msg, text)
2825
2826    def test_dsn(self):
2827        msg, text = self._msgobj('msg_16.txt')
2828        self._idempotent(msg, text)
2829
2830    def test_preamble_epilogue(self):
2831        msg, text = self._msgobj('msg_21.txt')
2832        self._idempotent(msg, text)
2833
2834    def test_multipart_one_part(self):
2835        msg, text = self._msgobj('msg_23.txt')
2836        self._idempotent(msg, text)
2837
2838    def test_multipart_no_parts(self):
2839        msg, text = self._msgobj('msg_24.txt')
2840        self._idempotent(msg, text)
2841
2842    def test_no_start_boundary(self):
2843        msg, text = self._msgobj('msg_31.txt')
2844        self._idempotent(msg, text)
2845
2846    def test_rfc2231_charset(self):
2847        msg, text = self._msgobj('msg_32.txt')
2848        self._idempotent(msg, text)
2849
2850    def test_more_rfc2231_parameters(self):
2851        msg, text = self._msgobj('msg_33.txt')
2852        self._idempotent(msg, text)
2853
2854    def test_text_plain_in_a_multipart_digest(self):
2855        msg, text = self._msgobj('msg_34.txt')
2856        self._idempotent(msg, text)
2857
2858    def test_nested_multipart_mixeds(self):
2859        msg, text = self._msgobj('msg_12a.txt')
2860        self._idempotent(msg, text)
2861
2862    def test_message_external_body_idempotent(self):
2863        msg, text = self._msgobj('msg_36.txt')
2864        self._idempotent(msg, text)
2865
2866    def test_message_delivery_status(self):
2867        msg, text = self._msgobj('msg_43.txt')
2868        self._idempotent(msg, text, unixfrom=True)
2869
2870    def test_message_signed_idempotent(self):
2871        msg, text = self._msgobj('msg_45.txt')
2872        self._idempotent(msg, text)
2873
2874    def test_content_type(self):
2875        eq = self.assertEqual
2876        # Get a message object and reset the seek pointer for other tests
2877        msg, text = self._msgobj('msg_05.txt')
2878        eq(msg.get_content_type(), 'multipart/report')
2879        # Test the Content-Type: parameters
2880        params = {}
2881        for pk, pv in msg.get_params():
2882            params[pk] = pv
2883        eq(params['report-type'], 'delivery-status')
2884        eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2885        eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2886        eq(msg.epilogue, self.linesep)
2887        eq(len(msg.get_payload()), 3)
2888        # Make sure the subparts are what we expect
2889        msg1 = msg.get_payload(0)
2890        eq(msg1.get_content_type(), 'text/plain')
2891        eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
2892        msg2 = msg.get_payload(1)
2893        eq(msg2.get_content_type(), 'text/plain')
2894        eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
2895        msg3 = msg.get_payload(2)
2896        eq(msg3.get_content_type(), 'message/rfc822')
2897        self.assertIsInstance(msg3, Message)
2898        payload = msg3.get_payload()
2899        self.assertIsInstance(payload, list)
2900        eq(len(payload), 1)
2901        msg4 = payload[0]
2902        self.assertIsInstance(msg4, Message)
2903        eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
2904
2905    def test_parser(self):
2906        eq = self.assertEqual
2907        msg, text = self._msgobj('msg_06.txt')
2908        # Check some of the outer headers
2909        eq(msg.get_content_type(), 'message/rfc822')
2910        # Make sure the payload is a list of exactly one sub-Message, and that
2911        # that submessage has a type of text/plain
2912        payload = msg.get_payload()
2913        self.assertIsInstance(payload, list)
2914        eq(len(payload), 1)
2915        msg1 = payload[0]
2916        self.assertIsInstance(msg1, Message)
2917        eq(msg1.get_content_type(), 'text/plain')
2918        self.assertIsInstance(msg1.get_payload(), str)
2919        eq(msg1.get_payload(), self.linesep)
2920
2921
2922
2923# Test various other bits of the package's functionality
2924class TestMiscellaneous(TestEmailBase):
2925    def test_message_from_string(self):
2926        with openfile('msg_01.txt', encoding="utf-8") as fp:
2927            text = fp.read()
2928        msg = email.message_from_string(text)
2929        s = StringIO()
2930        # Don't wrap/continue long headers since we're trying to test
2931        # idempotency.
2932        g = Generator(s, maxheaderlen=0)
2933        g.flatten(msg)
2934        self.assertEqual(text, s.getvalue())
2935
2936    def test_message_from_file(self):
2937        with openfile('msg_01.txt', encoding="utf-8") as fp:
2938            text = fp.read()
2939            fp.seek(0)
2940            msg = email.message_from_file(fp)
2941            s = StringIO()
2942            # Don't wrap/continue long headers since we're trying to test
2943            # idempotency.
2944            g = Generator(s, maxheaderlen=0)
2945            g.flatten(msg)
2946            self.assertEqual(text, s.getvalue())
2947
2948    def test_message_from_string_with_class(self):
2949        with openfile('msg_01.txt', encoding="utf-8") as fp:
2950            text = fp.read()
2951
2952        # Create a subclass
2953        class MyMessage(Message):
2954            pass
2955
2956        msg = email.message_from_string(text, MyMessage)
2957        self.assertIsInstance(msg, MyMessage)
2958        # Try something more complicated
2959        with openfile('msg_02.txt', encoding="utf-8") as fp:
2960            text = fp.read()
2961        msg = email.message_from_string(text, MyMessage)
2962        for subpart in msg.walk():
2963            self.assertIsInstance(subpart, MyMessage)
2964
2965    def test_message_from_file_with_class(self):
2966        # Create a subclass
2967        class MyMessage(Message):
2968            pass
2969
2970        with openfile('msg_01.txt', encoding="utf-8") as fp:
2971            msg = email.message_from_file(fp, MyMessage)
2972        self.assertIsInstance(msg, MyMessage)
2973        # Try something more complicated
2974        with openfile('msg_02.txt', encoding="utf-8") as fp:
2975            msg = email.message_from_file(fp, MyMessage)
2976        for subpart in msg.walk():
2977            self.assertIsInstance(subpart, MyMessage)
2978
2979    def test_custom_message_does_not_require_arguments(self):
2980        class MyMessage(Message):
2981            def __init__(self):
2982                super().__init__()
2983        msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2984        self.assertIsInstance(msg, MyMessage)
2985
2986    def test__all__(self):
2987        module = __import__('email')
2988        self.assertEqual(sorted(module.__all__), [
2989            'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2990            'generator', 'header', 'iterators', 'message',
2991            'message_from_binary_file', 'message_from_bytes',
2992            'message_from_file', 'message_from_string', 'mime', 'parser',
2993            'quoprimime', 'utils',
2994            ])
2995
2996    def test_formatdate(self):
2997        now = time.time()
2998        self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2999                         time.gmtime(now)[:6])
3000
3001    def test_formatdate_localtime(self):
3002        now = time.time()
3003        self.assertEqual(
3004            utils.parsedate(utils.formatdate(now, localtime=True))[:6],
3005            time.localtime(now)[:6])
3006
3007    def test_formatdate_usegmt(self):
3008        now = time.time()
3009        self.assertEqual(
3010            utils.formatdate(now, localtime=False),
3011            time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
3012        self.assertEqual(
3013            utils.formatdate(now, localtime=False, usegmt=True),
3014            time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
3015
3016    # parsedate and parsedate_tz will become deprecated interfaces someday
3017    def test_parsedate_returns_None_for_invalid_strings(self):
3018        self.assertIsNone(utils.parsedate(''))
3019        self.assertIsNone(utils.parsedate_tz(''))
3020        self.assertIsNone(utils.parsedate(' '))
3021        self.assertIsNone(utils.parsedate_tz(' '))
3022        self.assertIsNone(utils.parsedate('0'))
3023        self.assertIsNone(utils.parsedate_tz('0'))
3024        self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
3025        self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
3026        self.assertIsNone(utils.parsedate_tz('Wed, 3 Apr 2002 12.34.56.78+0800'))
3027        # Not a part of the spec but, but this has historically worked:
3028        self.assertIsNone(utils.parsedate(None))
3029        self.assertIsNone(utils.parsedate_tz(None))
3030
3031    def test_parsedate_compact(self):
3032        # The FWS after the comma is optional
3033        self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
3034                         utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
3035
3036    def test_parsedate_no_dayofweek(self):
3037        eq = self.assertEqual
3038        eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
3039           (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
3040
3041    def test_parsedate_compact_no_dayofweek(self):
3042        eq = self.assertEqual
3043        eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
3044           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3045
3046    def test_parsedate_no_space_before_positive_offset(self):
3047        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
3048           (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3049
3050    def test_parsedate_no_space_before_negative_offset(self):
3051        # Issue 1155362: we already handled '+' for this case.
3052        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
3053           (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
3054
3055
3056    def test_parsedate_accepts_time_with_dots(self):
3057        eq = self.assertEqual
3058        eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
3059           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3060        eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
3061           (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
3062
3063    def test_parsedate_acceptable_to_time_functions(self):
3064        eq = self.assertEqual
3065        timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
3066        t = int(time.mktime(timetup))
3067        eq(time.localtime(t)[:6], timetup[:6])
3068        eq(int(time.strftime('%Y', timetup)), 2003)
3069        timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
3070        t = int(time.mktime(timetup[:9]))
3071        eq(time.localtime(t)[:6], timetup[:6])
3072        eq(int(time.strftime('%Y', timetup[:9])), 2003)
3073
3074    def test_mktime_tz(self):
3075        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3076                                          -1, -1, -1, 0)), 0)
3077        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3078                                          -1, -1, -1, 1234)), -1234)
3079
3080    def test_parsedate_y2k(self):
3081        """Test for parsing a date with a two-digit year.
3082
3083        Parsing a date with a two-digit year should return the correct
3084        four-digit year. RFC822 allows two-digit years, but RFC2822 (which
3085        obsoletes RFC822) requires four-digit years.
3086
3087        """
3088        self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
3089                         utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
3090        self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
3091                         utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
3092
3093    def test_parseaddr_empty(self):
3094        self.assertEqual(utils.parseaddr('<>'), ('', ''))
3095        self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
3096
3097    def test_parseaddr_multiple_domains(self):
3098        self.assertEqual(
3099            utils.parseaddr('a@b@c'),
3100            ('', '')
3101        )
3102        self.assertEqual(
3103            utils.parseaddr('a@b.c@c'),
3104            ('', '')
3105        )
3106        self.assertEqual(
3107            utils.parseaddr('a@172.17.0.1@c'),
3108            ('', '')
3109        )
3110
3111    def test_noquote_dump(self):
3112        self.assertEqual(
3113            utils.formataddr(('A Silly Person', 'person@dom.ain')),
3114            'A Silly Person <person@dom.ain>')
3115
3116    def test_escape_dump(self):
3117        self.assertEqual(
3118            utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
3119            r'"A (Very) Silly Person" <person@dom.ain>')
3120        self.assertEqual(
3121            utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
3122            ('A (Very) Silly Person', 'person@dom.ain'))
3123        a = r'A \(Special\) Person'
3124        b = 'person@dom.ain'
3125        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3126
3127    def test_escape_backslashes(self):
3128        self.assertEqual(
3129            utils.formataddr((r'Arthur \Backslash\ Foobar', 'person@dom.ain')),
3130            r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
3131        a = r'Arthur \Backslash\ Foobar'
3132        b = 'person@dom.ain'
3133        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3134
3135    def test_quotes_unicode_names(self):
3136        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3137        name = "H\u00e4ns W\u00fcrst"
3138        addr = 'person@dom.ain'
3139        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
3140        latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
3141        self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
3142        self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
3143            latin1_quopri)
3144
3145    def test_accepts_any_charset_like_object(self):
3146        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3147        name = "H\u00e4ns W\u00fcrst"
3148        addr = 'person@dom.ain'
3149        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
3150        foobar = "FOOBAR"
3151        class CharsetMock:
3152            def header_encode(self, string):
3153                return foobar
3154        mock = CharsetMock()
3155        mock_expected = "%s <%s>" % (foobar, addr)
3156        self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
3157        self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
3158            utf8_base64)
3159
3160    def test_invalid_charset_like_object_raises_error(self):
3161        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3162        name = "H\u00e4ns W\u00fcrst"
3163        addr = 'person@dom.ain'
3164        # An object without a header_encode method:
3165        bad_charset = object()
3166        self.assertRaises(AttributeError, utils.formataddr, (name, addr),
3167            bad_charset)
3168
3169    def test_unicode_address_raises_error(self):
3170        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3171        addr = 'pers\u00f6n@dom.in'
3172        self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
3173        self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
3174
3175    def test_name_with_dot(self):
3176        x = 'John X. Doe <jxd@example.com>'
3177        y = '"John X. Doe" <jxd@example.com>'
3178        a, b = ('John X. Doe', 'jxd@example.com')
3179        self.assertEqual(utils.parseaddr(x), (a, b))
3180        self.assertEqual(utils.parseaddr(y), (a, b))
3181        # formataddr() quotes the name if there's a dot in it
3182        self.assertEqual(utils.formataddr((a, b)), y)
3183
3184    def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
3185        # issue 10005.  Note that in the third test the second pair of
3186        # backslashes is not actually a quoted pair because it is not inside a
3187        # comment or quoted string: the address being parsed has a quoted
3188        # string containing a quoted backslash, followed by 'example' and two
3189        # backslashes, followed by another quoted string containing a space and
3190        # the word 'example'.  parseaddr copies those two backslashes
3191        # literally.  Per rfc5322 this is not technically correct since a \ may
3192        # not appear in an address outside of a quoted string.  It is probably
3193        # a sensible Postel interpretation, though.
3194        eq = self.assertEqual
3195        eq(utils.parseaddr('""example" example"@example.com'),
3196          ('', '""example" example"@example.com'))
3197        eq(utils.parseaddr('"\\"example\\" example"@example.com'),
3198          ('', '"\\"example\\" example"@example.com'))
3199        eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
3200          ('', '"\\\\"example\\\\" example"@example.com'))
3201
3202    def test_parseaddr_preserves_spaces_in_local_part(self):
3203        # issue 9286.  A normal RFC5322 local part should not contain any
3204        # folding white space, but legacy local parts can (they are a sequence
3205        # of atoms, not dotatoms).  On the other hand we strip whitespace from
3206        # before the @ and around dots, on the assumption that the whitespace
3207        # around the punctuation is a mistake in what would otherwise be
3208        # an RFC5322 local part.  Leading whitespace is, usual, stripped as well.
3209        self.assertEqual(('', "merwok wok@xample.com"),
3210            utils.parseaddr("merwok wok@xample.com"))
3211        self.assertEqual(('', "merwok  wok@xample.com"),
3212            utils.parseaddr("merwok  wok@xample.com"))
3213        self.assertEqual(('', "merwok  wok@xample.com"),
3214            utils.parseaddr(" merwok  wok  @xample.com"))
3215        self.assertEqual(('', 'merwok"wok"  wok@xample.com'),
3216            utils.parseaddr('merwok"wok"  wok@xample.com'))
3217        self.assertEqual(('', 'merwok.wok.wok@xample.com'),
3218            utils.parseaddr('merwok. wok .  wok@xample.com'))
3219
3220    def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3221        addr = ("'foo@example.com' (foo@example.com)",
3222                'foo@example.com')
3223        addrstr = ('"\'foo@example.com\' '
3224                            '(foo@example.com)" <foo@example.com>')
3225        self.assertEqual(utils.parseaddr(addrstr), addr)
3226        self.assertEqual(utils.formataddr(addr), addrstr)
3227
3228
3229    def test_multiline_from_comment(self):
3230        x = """\
3231Foo
3232\tBar <foo@example.com>"""
3233        self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3234
3235    def test_quote_dump(self):
3236        self.assertEqual(
3237            utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3238            r'"A Silly; Person" <person@dom.ain>')
3239
3240    def test_charset_richcomparisons(self):
3241        eq = self.assertEqual
3242        ne = self.assertNotEqual
3243        cset1 = Charset()
3244        cset2 = Charset()
3245        eq(cset1, 'us-ascii')
3246        eq(cset1, 'US-ASCII')
3247        eq(cset1, 'Us-AsCiI')
3248        eq('us-ascii', cset1)
3249        eq('US-ASCII', cset1)
3250        eq('Us-AsCiI', cset1)
3251        ne(cset1, 'usascii')
3252        ne(cset1, 'USASCII')
3253        ne(cset1, 'UsAsCiI')
3254        ne('usascii', cset1)
3255        ne('USASCII', cset1)
3256        ne('UsAsCiI', cset1)
3257        eq(cset1, cset2)
3258        eq(cset2, cset1)
3259
3260    def test_getaddresses(self):
3261        eq = self.assertEqual
3262        eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3263                               'Bud Person <bperson@dom.ain>']),
3264           [('Al Person', 'aperson@dom.ain'),
3265            ('Bud Person', 'bperson@dom.ain')])
3266
3267    def test_getaddresses_nasty(self):
3268        eq = self.assertEqual
3269        eq(utils.getaddresses(['foo: ;']), [('', '')])
3270        eq(utils.getaddresses(
3271           ['[]*-- =~$']),
3272           [('', ''), ('', ''), ('', '*--')])
3273        eq(utils.getaddresses(
3274           ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3275           [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3276
3277    def test_getaddresses_embedded_comment(self):
3278        """Test proper handling of a nested comment"""
3279        eq = self.assertEqual
3280        addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3281        eq(addrs[0][1], 'foo@bar.com')
3282
3283    def test_getaddresses_header_obj(self):
3284        """Test the handling of a Header object."""
3285        addrs = utils.getaddresses([Header('Al Person <aperson@dom.ain>')])
3286        self.assertEqual(addrs[0][1], 'aperson@dom.ain')
3287
3288    def test_make_msgid_collisions(self):
3289        # Test make_msgid uniqueness, even with multiple threads
3290        class MsgidsThread(Thread):
3291            def run(self):
3292                # generate msgids for 3 seconds
3293                self.msgids = []
3294                append = self.msgids.append
3295                make_msgid = utils.make_msgid
3296                clock = time.monotonic
3297                tfin = clock() + 3.0
3298                while clock() < tfin:
3299                    append(make_msgid(domain='testdomain-string'))
3300
3301        threads = [MsgidsThread() for i in range(5)]
3302        with threading_helper.start_threads(threads):
3303            pass
3304        all_ids = sum([t.msgids for t in threads], [])
3305        self.assertEqual(len(set(all_ids)), len(all_ids))
3306
3307    def test_utils_quote_unquote(self):
3308        eq = self.assertEqual
3309        msg = Message()
3310        msg.add_header('content-disposition', 'attachment',
3311                       filename='foo\\wacky"name')
3312        eq(msg.get_filename(), 'foo\\wacky"name')
3313
3314    def test_get_body_encoding_with_bogus_charset(self):
3315        charset = Charset('not a charset')
3316        self.assertEqual(charset.get_body_encoding(), 'base64')
3317
3318    def test_get_body_encoding_with_uppercase_charset(self):
3319        eq = self.assertEqual
3320        msg = Message()
3321        msg['Content-Type'] = 'text/plain; charset=UTF-8'
3322        eq(msg['content-type'], 'text/plain; charset=UTF-8')
3323        charsets = msg.get_charsets()
3324        eq(len(charsets), 1)
3325        eq(charsets[0], 'utf-8')
3326        charset = Charset(charsets[0])
3327        eq(charset.get_body_encoding(), 'base64')
3328        msg.set_payload(b'hello world', charset=charset)
3329        eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3330        eq(msg.get_payload(decode=True), b'hello world')
3331        eq(msg['content-transfer-encoding'], 'base64')
3332        # Try another one
3333        msg = Message()
3334        msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3335        charsets = msg.get_charsets()
3336        eq(len(charsets), 1)
3337        eq(charsets[0], 'us-ascii')
3338        charset = Charset(charsets[0])
3339        eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3340        msg.set_payload('hello world', charset=charset)
3341        eq(msg.get_payload(), 'hello world')
3342        eq(msg['content-transfer-encoding'], '7bit')
3343
3344    def test_charsets_case_insensitive(self):
3345        lc = Charset('us-ascii')
3346        uc = Charset('US-ASCII')
3347        self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3348
3349    def test_partial_falls_inside_message_delivery_status(self):
3350        eq = self.ndiffAssertEqual
3351        # The Parser interface provides chunks of data to FeedParser in 8192
3352        # byte gulps.  SF bug #1076485 found one of those chunks inside
3353        # message/delivery-status header block, which triggered an
3354        # unreadline() of NeedMoreData.
3355        msg = self._msgobj('msg_43.txt')
3356        sfp = StringIO()
3357        iterators._structure(msg, sfp)
3358        eq(sfp.getvalue(), """\
3359multipart/report
3360    text/plain
3361    message/delivery-status
3362        text/plain
3363        text/plain
3364        text/plain
3365        text/plain
3366        text/plain
3367        text/plain
3368        text/plain
3369        text/plain
3370        text/plain
3371        text/plain
3372        text/plain
3373        text/plain
3374        text/plain
3375        text/plain
3376        text/plain
3377        text/plain
3378        text/plain
3379        text/plain
3380        text/plain
3381        text/plain
3382        text/plain
3383        text/plain
3384        text/plain
3385        text/plain
3386        text/plain
3387        text/plain
3388    text/rfc822-headers
3389""")
3390
3391    def test_make_msgid_domain(self):
3392        self.assertEqual(
3393            email.utils.make_msgid(domain='testdomain-string')[-19:],
3394            '@testdomain-string>')
3395
3396    def test_make_msgid_idstring(self):
3397        self.assertEqual(
3398            email.utils.make_msgid(idstring='test-idstring',
3399                domain='testdomain-string')[-33:],
3400            '.test-idstring@testdomain-string>')
3401
3402    def test_make_msgid_default_domain(self):
3403        with patch('socket.getfqdn') as mock_getfqdn:
3404            mock_getfqdn.return_value = domain = 'pythontest.example.com'
3405            self.assertTrue(
3406                email.utils.make_msgid().endswith(
3407                    '@' + domain + '>'))
3408
3409    def test_Generator_linend(self):
3410        # Issue 14645.
3411        with openfile('msg_26.txt', encoding="utf-8", newline='\n') as f:
3412            msgtxt = f.read()
3413        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3414        msg = email.message_from_string(msgtxt)
3415        s = StringIO()
3416        g = email.generator.Generator(s)
3417        g.flatten(msg)
3418        self.assertEqual(s.getvalue(), msgtxt_nl)
3419
3420    def test_BytesGenerator_linend(self):
3421        # Issue 14645.
3422        with openfile('msg_26.txt', encoding="utf-8", newline='\n') as f:
3423            msgtxt = f.read()
3424        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3425        msg = email.message_from_string(msgtxt_nl)
3426        s = BytesIO()
3427        g = email.generator.BytesGenerator(s)
3428        g.flatten(msg, linesep='\r\n')
3429        self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3430
3431    def test_BytesGenerator_linend_with_non_ascii(self):
3432        # Issue 14645.
3433        with openfile('msg_26.txt', 'rb') as f:
3434            msgtxt = f.read()
3435        msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3436        msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3437        msg = email.message_from_bytes(msgtxt_nl)
3438        s = BytesIO()
3439        g = email.generator.BytesGenerator(s)
3440        g.flatten(msg, linesep='\r\n')
3441        self.assertEqual(s.getvalue(), msgtxt)
3442
3443    def test_mime_classes_policy_argument(self):
3444        with openfile('audiotest.au', 'rb') as fp:
3445            audiodata = fp.read()
3446        with openfile('PyBanner048.gif', 'rb') as fp:
3447            bindata = fp.read()
3448        classes = [
3449            (MIMEApplication, ('',)),
3450            (MIMEAudio, (audiodata,)),
3451            (MIMEImage, (bindata,)),
3452            (MIMEMessage, (Message(),)),
3453            (MIMENonMultipart, ('multipart', 'mixed')),
3454            (MIMEText, ('',)),
3455        ]
3456        for cls, constructor in classes:
3457            with self.subTest(cls=cls.__name__, policy='compat32'):
3458                m = cls(*constructor)
3459                self.assertIs(m.policy, email.policy.compat32)
3460            with self.subTest(cls=cls.__name__, policy='default'):
3461                m = cls(*constructor, policy=email.policy.default)
3462                self.assertIs(m.policy, email.policy.default)
3463
3464
3465# Test the iterator/generators
3466class TestIterators(TestEmailBase):
3467    def test_body_line_iterator(self):
3468        eq = self.assertEqual
3469        neq = self.ndiffAssertEqual
3470        # First a simple non-multipart message
3471        msg = self._msgobj('msg_01.txt')
3472        it = iterators.body_line_iterator(msg)
3473        lines = list(it)
3474        eq(len(lines), 6)
3475        neq(EMPTYSTRING.join(lines), msg.get_payload())
3476        # Now a more complicated multipart
3477        msg = self._msgobj('msg_02.txt')
3478        it = iterators.body_line_iterator(msg)
3479        lines = list(it)
3480        eq(len(lines), 43)
3481        with openfile('msg_19.txt', encoding="utf-8") as fp:
3482            neq(EMPTYSTRING.join(lines), fp.read())
3483
3484    def test_typed_subpart_iterator(self):
3485        eq = self.assertEqual
3486        msg = self._msgobj('msg_04.txt')
3487        it = iterators.typed_subpart_iterator(msg, 'text')
3488        lines = []
3489        subparts = 0
3490        for subpart in it:
3491            subparts += 1
3492            lines.append(subpart.get_payload())
3493        eq(subparts, 2)
3494        eq(EMPTYSTRING.join(lines), """\
3495a simple kind of mirror
3496to reflect upon our own
3497a simple kind of mirror
3498to reflect upon our own
3499""")
3500
3501    def test_typed_subpart_iterator_default_type(self):
3502        eq = self.assertEqual
3503        msg = self._msgobj('msg_03.txt')
3504        it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3505        lines = []
3506        subparts = 0
3507        for subpart in it:
3508            subparts += 1
3509            lines.append(subpart.get_payload())
3510        eq(subparts, 1)
3511        eq(EMPTYSTRING.join(lines), """\
3512
3513Hi,
3514
3515Do you like this message?
3516
3517-Me
3518""")
3519
3520    def test_pushCR_LF(self):
3521        '''FeedParser BufferedSubFile.push() assumed it received complete
3522           line endings.  A CR ending one push() followed by a LF starting
3523           the next push() added an empty line.
3524        '''
3525        imt = [
3526            ("a\r \n",  2),
3527            ("b",       0),
3528            ("c\n",     1),
3529            ("",        0),
3530            ("d\r\n",   1),
3531            ("e\r",     0),
3532            ("\nf",     1),
3533            ("\r\n",    1),
3534          ]
3535        from email.feedparser import BufferedSubFile, NeedMoreData
3536        bsf = BufferedSubFile()
3537        om = []
3538        nt = 0
3539        for il, n in imt:
3540            bsf.push(il)
3541            nt += n
3542            n1 = 0
3543            for ol in iter(bsf.readline, NeedMoreData):
3544                om.append(ol)
3545                n1 += 1
3546            self.assertEqual(n, n1)
3547        self.assertEqual(len(om), nt)
3548        self.assertEqual(''.join([il for il, n in imt]), ''.join(om))
3549
3550    def test_push_random(self):
3551        from email.feedparser import BufferedSubFile, NeedMoreData
3552
3553        n = 10000
3554        chunksize = 5
3555        chars = 'abcd \t\r\n'
3556
3557        s = ''.join(choice(chars) for i in range(n)) + '\n'
3558        target = s.splitlines(True)
3559
3560        bsf = BufferedSubFile()
3561        lines = []
3562        for i in range(0, len(s), chunksize):
3563            chunk = s[i:i+chunksize]
3564            bsf.push(chunk)
3565            lines.extend(iter(bsf.readline, NeedMoreData))
3566        self.assertEqual(lines, target)
3567
3568
3569class TestFeedParsers(TestEmailBase):
3570
3571    def parse(self, chunks):
3572        feedparser = FeedParser()
3573        for chunk in chunks:
3574            feedparser.feed(chunk)
3575        return feedparser.close()
3576
3577    def test_empty_header_name_handled(self):
3578        # Issue 19996
3579        msg = self.parse("First: val\n: bad\nSecond: val")
3580        self.assertEqual(msg['First'], 'val')
3581        self.assertEqual(msg['Second'], 'val')
3582
3583    def test_newlines(self):
3584        m = self.parse(['a:\nb:\rc:\r\nd:\n'])
3585        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3586        m = self.parse(['a:\nb:\rc:\r\nd:'])
3587        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3588        m = self.parse(['a:\rb', 'c:\n'])
3589        self.assertEqual(m.keys(), ['a', 'bc'])
3590        m = self.parse(['a:\r', 'b:\n'])
3591        self.assertEqual(m.keys(), ['a', 'b'])
3592        m = self.parse(['a:\r', '\nb:\n'])
3593        self.assertEqual(m.keys(), ['a', 'b'])
3594
3595        # Only CR and LF should break header fields
3596        m = self.parse(['a:\x85b:\u2028c:\n'])
3597        self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')])
3598        m = self.parse(['a:\r', 'b:\x85', 'c:\n'])
3599        self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')])
3600
3601    def test_long_lines(self):
3602        # Expected peak memory use on 32-bit platform: 6*N*M bytes.
3603        M, N = 1000, 20000
3604        m = self.parse(['a:b\n\n'] + ['x'*M] * N)
3605        self.assertEqual(m.items(), [('a', 'b')])
3606        self.assertEqual(m.get_payload(), 'x'*M*N)
3607        m = self.parse(['a:b\r\r'] + ['x'*M] * N)
3608        self.assertEqual(m.items(), [('a', 'b')])
3609        self.assertEqual(m.get_payload(), 'x'*M*N)
3610        m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N)
3611        self.assertEqual(m.items(), [('a', 'b')])
3612        self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N)
3613        m = self.parse(['a:\r', 'b: '] + ['x'*M] * N)
3614        self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)])
3615
3616
3617class TestParsers(TestEmailBase):
3618
3619    def test_header_parser(self):
3620        eq = self.assertEqual
3621        # Parse only the headers of a complex multipart MIME document
3622        with openfile('msg_02.txt', encoding="utf-8") as fp:
3623            msg = HeaderParser().parse(fp)
3624        eq(msg['from'], 'ppp-request@zzz.org')
3625        eq(msg['to'], 'ppp@zzz.org')
3626        eq(msg.get_content_type(), 'multipart/mixed')
3627        self.assertFalse(msg.is_multipart())
3628        self.assertIsInstance(msg.get_payload(), str)
3629
3630    def test_bytes_header_parser(self):
3631        eq = self.assertEqual
3632        # Parse only the headers of a complex multipart MIME document
3633        with openfile('msg_02.txt', 'rb') as fp:
3634            msg = email.parser.BytesHeaderParser().parse(fp)
3635        eq(msg['from'], 'ppp-request@zzz.org')
3636        eq(msg['to'], 'ppp@zzz.org')
3637        eq(msg.get_content_type(), 'multipart/mixed')
3638        self.assertFalse(msg.is_multipart())
3639        self.assertIsInstance(msg.get_payload(), str)
3640        self.assertIsInstance(msg.get_payload(decode=True), bytes)
3641
3642    def test_bytes_parser_does_not_close_file(self):
3643        with openfile('msg_02.txt', 'rb') as fp:
3644            email.parser.BytesParser().parse(fp)
3645            self.assertFalse(fp.closed)
3646
3647    def test_bytes_parser_on_exception_does_not_close_file(self):
3648        with openfile('msg_15.txt', 'rb') as fp:
3649            bytesParser = email.parser.BytesParser
3650            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3651                              bytesParser(policy=email.policy.strict).parse,
3652                              fp)
3653            self.assertFalse(fp.closed)
3654
3655    def test_parser_does_not_close_file(self):
3656        with openfile('msg_02.txt', encoding="utf-8") as fp:
3657            email.parser.Parser().parse(fp)
3658            self.assertFalse(fp.closed)
3659
3660    def test_parser_on_exception_does_not_close_file(self):
3661        with openfile('msg_15.txt', encoding="utf-8") as fp:
3662            parser = email.parser.Parser
3663            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3664                              parser(policy=email.policy.strict).parse, fp)
3665            self.assertFalse(fp.closed)
3666
3667    def test_whitespace_continuation(self):
3668        eq = self.assertEqual
3669        # This message contains a line after the Subject: header that has only
3670        # whitespace, but it is not empty!
3671        msg = email.message_from_string("""\
3672From: aperson@dom.ain
3673To: bperson@dom.ain
3674Subject: the next line has a space on it
3675\x20
3676Date: Mon, 8 Apr 2002 15:09:19 -0400
3677Message-ID: spam
3678
3679Here's the message body
3680""")
3681        eq(msg['subject'], 'the next line has a space on it\n ')
3682        eq(msg['message-id'], 'spam')
3683        eq(msg.get_payload(), "Here's the message body\n")
3684
3685    def test_whitespace_continuation_last_header(self):
3686        eq = self.assertEqual
3687        # Like the previous test, but the subject line is the last
3688        # header.
3689        msg = email.message_from_string("""\
3690From: aperson@dom.ain
3691To: bperson@dom.ain
3692Date: Mon, 8 Apr 2002 15:09:19 -0400
3693Message-ID: spam
3694Subject: the next line has a space on it
3695\x20
3696
3697Here's the message body
3698""")
3699        eq(msg['subject'], 'the next line has a space on it\n ')
3700        eq(msg['message-id'], 'spam')
3701        eq(msg.get_payload(), "Here's the message body\n")
3702
3703    def test_crlf_separation(self):
3704        eq = self.assertEqual
3705        with openfile('msg_26.txt', encoding="utf-8", newline='\n') as fp:
3706            msg = Parser().parse(fp)
3707        eq(len(msg.get_payload()), 2)
3708        part1 = msg.get_payload(0)
3709        eq(part1.get_content_type(), 'text/plain')
3710        eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3711        part2 = msg.get_payload(1)
3712        eq(part2.get_content_type(), 'application/riscos')
3713
3714    def test_crlf_flatten(self):
3715        # Using newline='\n' preserves the crlfs in this input file.
3716        with openfile('msg_26.txt', encoding="utf-8", newline='\n') as fp:
3717            text = fp.read()
3718        msg = email.message_from_string(text)
3719        s = StringIO()
3720        g = Generator(s)
3721        g.flatten(msg, linesep='\r\n')
3722        self.assertEqual(s.getvalue(), text)
3723
3724    maxDiff = None
3725
3726    def test_multipart_digest_with_extra_mime_headers(self):
3727        eq = self.assertEqual
3728        neq = self.ndiffAssertEqual
3729        with openfile('msg_28.txt', encoding="utf-8") as fp:
3730            msg = email.message_from_file(fp)
3731        # Structure is:
3732        # multipart/digest
3733        #   message/rfc822
3734        #     text/plain
3735        #   message/rfc822
3736        #     text/plain
3737        eq(msg.is_multipart(), 1)
3738        eq(len(msg.get_payload()), 2)
3739        part1 = msg.get_payload(0)
3740        eq(part1.get_content_type(), 'message/rfc822')
3741        eq(part1.is_multipart(), 1)
3742        eq(len(part1.get_payload()), 1)
3743        part1a = part1.get_payload(0)
3744        eq(part1a.is_multipart(), 0)
3745        eq(part1a.get_content_type(), 'text/plain')
3746        neq(part1a.get_payload(), 'message 1\n')
3747        # next message/rfc822
3748        part2 = msg.get_payload(1)
3749        eq(part2.get_content_type(), 'message/rfc822')
3750        eq(part2.is_multipart(), 1)
3751        eq(len(part2.get_payload()), 1)
3752        part2a = part2.get_payload(0)
3753        eq(part2a.is_multipart(), 0)
3754        eq(part2a.get_content_type(), 'text/plain')
3755        neq(part2a.get_payload(), 'message 2\n')
3756
3757    def test_three_lines(self):
3758        # A bug report by Andrew McNamara
3759        lines = ['From: Andrew Person <aperson@dom.ain',
3760                 'Subject: Test',
3761                 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3762        msg = email.message_from_string(NL.join(lines))
3763        self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3764
3765    def test_strip_line_feed_and_carriage_return_in_headers(self):
3766        eq = self.assertEqual
3767        # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3768        value1 = 'text'
3769        value2 = 'more text'
3770        m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3771            value1, value2)
3772        msg = email.message_from_string(m)
3773        eq(msg.get('Header'), value1)
3774        eq(msg.get('Next-Header'), value2)
3775
3776    def test_rfc2822_header_syntax(self):
3777        eq = self.assertEqual
3778        m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3779        msg = email.message_from_string(m)
3780        eq(len(msg), 3)
3781        eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3782        eq(msg.get_payload(), 'body')
3783
3784    def test_rfc2822_space_not_allowed_in_header(self):
3785        eq = self.assertEqual
3786        m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3787        msg = email.message_from_string(m)
3788        eq(len(msg.keys()), 0)
3789
3790    def test_rfc2822_one_character_header(self):
3791        eq = self.assertEqual
3792        m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3793        msg = email.message_from_string(m)
3794        headers = msg.keys()
3795        headers.sort()
3796        eq(headers, ['A', 'B', 'CC'])
3797        eq(msg.get_payload(), 'body')
3798
3799    def test_CRLFLF_at_end_of_part(self):
3800        # issue 5610: feedparser should not eat two chars from body part ending
3801        # with "\r\n\n".
3802        m = (
3803            "From: foo@bar.com\n"
3804            "To: baz\n"
3805            "Mime-Version: 1.0\n"
3806            "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3807            "\n"
3808            "--BOUNDARY\n"
3809            "Content-Type: text/plain\n"
3810            "\n"
3811            "body ending with CRLF newline\r\n"
3812            "\n"
3813            "--BOUNDARY--\n"
3814          )
3815        msg = email.message_from_string(m)
3816        self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
3817
3818
3819class Test8BitBytesHandling(TestEmailBase):
3820    # In Python3 all input is string, but that doesn't work if the actual input
3821    # uses an 8bit transfer encoding.  To hack around that, in email 5.1 we
3822    # decode byte streams using the surrogateescape error handler, and
3823    # reconvert to binary at appropriate places if we detect surrogates.  This
3824    # doesn't allow us to transform headers with 8bit bytes (they get munged),
3825    # but it does allow us to parse and preserve them, and to decode body
3826    # parts that use an 8bit CTE.
3827
3828    bodytest_msg = textwrap.dedent("""\
3829        From: foo@bar.com
3830        To: baz
3831        Mime-Version: 1.0
3832        Content-Type: text/plain; charset={charset}
3833        Content-Transfer-Encoding: {cte}
3834
3835        {bodyline}
3836        """)
3837
3838    def test_known_8bit_CTE(self):
3839        m = self.bodytest_msg.format(charset='utf-8',
3840                                     cte='8bit',
3841                                     bodyline='pöstal').encode('utf-8')
3842        msg = email.message_from_bytes(m)
3843        self.assertEqual(msg.get_payload(), "pöstal\n")
3844        self.assertEqual(msg.get_payload(decode=True),
3845                         "pöstal\n".encode('utf-8'))
3846
3847    def test_unknown_8bit_CTE(self):
3848        m = self.bodytest_msg.format(charset='notavalidcharset',
3849                                     cte='8bit',
3850                                     bodyline='pöstal').encode('utf-8')
3851        msg = email.message_from_bytes(m)
3852        self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
3853        self.assertEqual(msg.get_payload(decode=True),
3854                         "pöstal\n".encode('utf-8'))
3855
3856    def test_8bit_in_quopri_body(self):
3857        # This is non-RFC compliant data...without 'decode' the library code
3858        # decodes the body using the charset from the headers, and because the
3859        # source byte really is utf-8 this works.  This is likely to fail
3860        # against real dirty data (ie: produce mojibake), but the data is
3861        # invalid anyway so it is as good a guess as any.  But this means that
3862        # this test just confirms the current behavior; that behavior is not
3863        # necessarily the best possible behavior.  With 'decode' it is
3864        # returning the raw bytes, so that test should be of correct behavior,
3865        # or at least produce the same result that email4 did.
3866        m = self.bodytest_msg.format(charset='utf-8',
3867                                     cte='quoted-printable',
3868                                     bodyline='p=C3=B6stál').encode('utf-8')
3869        msg = email.message_from_bytes(m)
3870        self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3871        self.assertEqual(msg.get_payload(decode=True),
3872                         'pöstál\n'.encode('utf-8'))
3873
3874    def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3875        # This is similar to the previous test, but proves that if the 8bit
3876        # byte is undecodeable in the specified charset, it gets replaced
3877        # by the unicode 'unknown' character.  Again, this may or may not
3878        # be the ideal behavior.  Note that if decode=False none of the
3879        # decoders will get involved, so this is the only test we need
3880        # for this behavior.
3881        m = self.bodytest_msg.format(charset='ascii',
3882                                     cte='quoted-printable',
3883                                     bodyline='p=C3=B6stál').encode('utf-8')
3884        msg = email.message_from_bytes(m)
3885        self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
3886        self.assertEqual(msg.get_payload(decode=True),
3887                        'pöstál\n'.encode('utf-8'))
3888
3889    # test_defect_handling:test_invalid_chars_in_base64_payload
3890    def test_8bit_in_base64_body(self):
3891        # If we get 8bit bytes in a base64 body, we can just ignore them
3892        # as being outside the base64 alphabet and decode anyway.  But
3893        # we register a defect.
3894        m = self.bodytest_msg.format(charset='utf-8',
3895                                     cte='base64',
3896                                     bodyline='cMO2c3RhbAá=').encode('utf-8')
3897        msg = email.message_from_bytes(m)
3898        self.assertEqual(msg.get_payload(decode=True),
3899                         'pöstal'.encode('utf-8'))
3900        self.assertIsInstance(msg.defects[0],
3901                              errors.InvalidBase64CharactersDefect)
3902
3903    def test_8bit_in_uuencode_body(self):
3904        # Sticking an 8bit byte in a uuencode block makes it undecodable by
3905        # normal means, so the block is returned undecoded, but as bytes.
3906        m = self.bodytest_msg.format(charset='utf-8',
3907                                     cte='uuencode',
3908                                     bodyline='<,.V<W1A; á ').encode('utf-8')
3909        msg = email.message_from_bytes(m)
3910        self.assertEqual(msg.get_payload(decode=True),
3911                         '<,.V<W1A; á \n'.encode('utf-8'))
3912
3913
3914    headertest_headers = (
3915        ('From: foo@bar.com', ('From', 'foo@bar.com')),
3916        ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3917        ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3918            '\tJean de Baddie',
3919            ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3920                'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3921                ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3922        ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3923        )
3924    headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3925        '\nYes, they are flying.\n').encode('utf-8')
3926
3927    def test_get_8bit_header(self):
3928        msg = email.message_from_bytes(self.headertest_msg)
3929        self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3930        self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
3931
3932    def test_print_8bit_headers(self):
3933        msg = email.message_from_bytes(self.headertest_msg)
3934        self.assertEqual(str(msg),
3935                         textwrap.dedent("""\
3936                            From: {}
3937                            To: {}
3938                            Subject: {}
3939                            From: {}
3940
3941                            Yes, they are flying.
3942                            """).format(*[expected[1] for (_, expected) in
3943                                        self.headertest_headers]))
3944
3945    def test_values_with_8bit_headers(self):
3946        msg = email.message_from_bytes(self.headertest_msg)
3947        self.assertListEqual([str(x) for x in msg.values()],
3948                              ['foo@bar.com',
3949                               'b\uFFFD\uFFFDz',
3950                               'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3951                                   'coll\uFFFD\uFFFDgue, le pouf '
3952                                   'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3953                                   '\tJean de Baddie',
3954                               "g\uFFFD\uFFFDst"])
3955
3956    def test_items_with_8bit_headers(self):
3957        msg = email.message_from_bytes(self.headertest_msg)
3958        self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
3959                              [('From', 'foo@bar.com'),
3960                               ('To', 'b\uFFFD\uFFFDz'),
3961                               ('Subject', 'Maintenant je vous '
3962                                  'pr\uFFFD\uFFFDsente '
3963                                  'mon coll\uFFFD\uFFFDgue, le pouf '
3964                                  'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3965                                  '\tJean de Baddie'),
3966                               ('From', 'g\uFFFD\uFFFDst')])
3967
3968    def test_get_all_with_8bit_headers(self):
3969        msg = email.message_from_bytes(self.headertest_msg)
3970        self.assertListEqual([str(x) for x in msg.get_all('from')],
3971                              ['foo@bar.com',
3972                               'g\uFFFD\uFFFDst'])
3973
3974    def test_get_content_type_with_8bit(self):
3975        msg = email.message_from_bytes(textwrap.dedent("""\
3976            Content-Type: text/pl\xA7in; charset=utf-8
3977            """).encode('latin-1'))
3978        self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3979        self.assertEqual(msg.get_content_maintype(), "text")
3980        self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3981
3982    # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
3983    def test_get_params_with_8bit(self):
3984        msg = email.message_from_bytes(
3985            'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3986        self.assertEqual(msg.get_params(header='x-header'),
3987           [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3988        self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3989        # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3990        self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3991
3992    # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
3993    def test_get_rfc2231_params_with_8bit(self):
3994        msg = email.message_from_bytes(textwrap.dedent("""\
3995            Content-Type: text/plain; charset=us-ascii;
3996             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3997             ).encode('latin-1'))
3998        self.assertEqual(msg.get_param('title'),
3999            ('us-ascii', 'en', 'This is not f\uFFFDn'))
4000
4001    def test_set_rfc2231_params_with_8bit(self):
4002        msg = email.message_from_bytes(textwrap.dedent("""\
4003            Content-Type: text/plain; charset=us-ascii;
4004             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
4005             ).encode('latin-1'))
4006        msg.set_param('title', 'test')
4007        self.assertEqual(msg.get_param('title'), 'test')
4008
4009    def test_del_rfc2231_params_with_8bit(self):
4010        msg = email.message_from_bytes(textwrap.dedent("""\
4011            Content-Type: text/plain; charset=us-ascii;
4012             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
4013             ).encode('latin-1'))
4014        msg.del_param('title')
4015        self.assertEqual(msg.get_param('title'), None)
4016        self.assertEqual(msg.get_content_maintype(), 'text')
4017
4018    def test_get_payload_with_8bit_cte_header(self):
4019        msg = email.message_from_bytes(textwrap.dedent("""\
4020            Content-Transfer-Encoding: b\xa7se64
4021            Content-Type: text/plain; charset=latin-1
4022
4023            payload
4024            """).encode('latin-1'))
4025        self.assertEqual(msg.get_payload(), 'payload\n')
4026        self.assertEqual(msg.get_payload(decode=True), b'payload\n')
4027
4028    non_latin_bin_msg = textwrap.dedent("""\
4029        From: foo@bar.com
4030        To: báz
4031        Subject: Maintenant je vous présente mon collègue, le pouf célèbre
4032        \tJean de Baddie
4033        Mime-Version: 1.0
4034        Content-Type: text/plain; charset="utf-8"
4035        Content-Transfer-Encoding: 8bit
4036
4037        Да, они летят.
4038        """).encode('utf-8')
4039
4040    def test_bytes_generator(self):
4041        msg = email.message_from_bytes(self.non_latin_bin_msg)
4042        out = BytesIO()
4043        email.generator.BytesGenerator(out).flatten(msg)
4044        self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
4045
4046    def test_bytes_generator_handles_None_body(self):
4047        #Issue 11019
4048        msg = email.message.Message()
4049        out = BytesIO()
4050        email.generator.BytesGenerator(out).flatten(msg)
4051        self.assertEqual(out.getvalue(), b"\n")
4052
4053    non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
4054        From: foo@bar.com
4055        To: =?unknown-8bit?q?b=C3=A1z?=
4056        Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
4057         =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
4058         =?unknown-8bit?q?_Jean_de_Baddie?=
4059        Mime-Version: 1.0
4060        Content-Type: text/plain; charset="utf-8"
4061        Content-Transfer-Encoding: base64
4062
4063        0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
4064        """)
4065
4066    def test_generator_handles_8bit(self):
4067        msg = email.message_from_bytes(self.non_latin_bin_msg)
4068        out = StringIO()
4069        email.generator.Generator(out).flatten(msg)
4070        self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
4071
4072    def test_str_generator_should_not_mutate_msg_when_handling_8bit(self):
4073        msg = email.message_from_bytes(self.non_latin_bin_msg)
4074        out = BytesIO()
4075        BytesGenerator(out).flatten(msg)
4076        orig_value = out.getvalue()
4077        Generator(StringIO()).flatten(msg) # Should not mutate msg!
4078        out = BytesIO()
4079        BytesGenerator(out).flatten(msg)
4080        self.assertEqual(out.getvalue(), orig_value)
4081
4082    def test_bytes_generator_with_unix_from(self):
4083        # The unixfrom contains a current date, so we can't check it
4084        # literally.  Just make sure the first word is 'From' and the
4085        # rest of the message matches the input.
4086        msg = email.message_from_bytes(self.non_latin_bin_msg)
4087        out = BytesIO()
4088        email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
4089        lines = out.getvalue().split(b'\n')
4090        self.assertEqual(lines[0].split()[0], b'From')
4091        self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
4092
4093    non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
4094    non_latin_bin_msg_as7bit[2:4] = [
4095        'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
4096         'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
4097    non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
4098
4099    def test_message_from_binary_file(self):
4100        fn = 'test.msg'
4101        self.addCleanup(unlink, fn)
4102        with open(fn, 'wb') as testfile:
4103            testfile.write(self.non_latin_bin_msg)
4104        with open(fn, 'rb') as testfile:
4105            m = email.parser.BytesParser().parse(testfile)
4106        self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
4107
4108    latin_bin_msg = textwrap.dedent("""\
4109        From: foo@bar.com
4110        To: Dinsdale
4111        Subject: Nudge nudge, wink, wink
4112        Mime-Version: 1.0
4113        Content-Type: text/plain; charset="latin-1"
4114        Content-Transfer-Encoding: 8bit
4115
4116        oh là là, know what I mean, know what I mean?
4117        """).encode('latin-1')
4118
4119    latin_bin_msg_as7bit = textwrap.dedent("""\
4120        From: foo@bar.com
4121        To: Dinsdale
4122        Subject: Nudge nudge, wink, wink
4123        Mime-Version: 1.0
4124        Content-Type: text/plain; charset="iso-8859-1"
4125        Content-Transfer-Encoding: quoted-printable
4126
4127        oh l=E0 l=E0, know what I mean, know what I mean?
4128        """)
4129
4130    def test_string_generator_reencodes_to_quopri_when_appropriate(self):
4131        m = email.message_from_bytes(self.latin_bin_msg)
4132        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4133
4134    def test_decoded_generator_emits_unicode_body(self):
4135        m = email.message_from_bytes(self.latin_bin_msg)
4136        out = StringIO()
4137        email.generator.DecodedGenerator(out).flatten(m)
4138        #DecodedHeader output contains an extra blank line compared
4139        #to the input message.  RDM: not sure if this is a bug or not,
4140        #but it is not specific to the 8bit->7bit conversion.
4141        self.assertEqual(out.getvalue(),
4142            self.latin_bin_msg.decode('latin-1')+'\n')
4143
4144    def test_bytes_feedparser(self):
4145        bfp = email.feedparser.BytesFeedParser()
4146        for i in range(0, len(self.latin_bin_msg), 10):
4147            bfp.feed(self.latin_bin_msg[i:i+10])
4148        m = bfp.close()
4149        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4150
4151    def test_crlf_flatten(self):
4152        with openfile('msg_26.txt', 'rb') as fp:
4153            text = fp.read()
4154        msg = email.message_from_bytes(text)
4155        s = BytesIO()
4156        g = email.generator.BytesGenerator(s)
4157        g.flatten(msg, linesep='\r\n')
4158        self.assertEqual(s.getvalue(), text)
4159
4160    def test_8bit_multipart(self):
4161        # Issue 11605
4162        source = textwrap.dedent("""\
4163            Date: Fri, 18 Mar 2011 17:15:43 +0100
4164            To: foo@example.com
4165            From: foodwatch-Newsletter <bar@example.com>
4166            Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
4167            Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
4168            MIME-Version: 1.0
4169            Content-Type: multipart/alternative;
4170                    boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
4171
4172            --b1_76a486bee62b0d200f33dc2ca08220ad
4173            Content-Type: text/plain; charset="utf-8"
4174            Content-Transfer-Encoding: 8bit
4175
4176            Guten Tag, ,
4177
4178            mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
4179            Nachrichten aus Japan.
4180
4181
4182            --b1_76a486bee62b0d200f33dc2ca08220ad
4183            Content-Type: text/html; charset="utf-8"
4184            Content-Transfer-Encoding: 8bit
4185
4186            <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
4187                "http://www.w3.org/TR/html4/loose.dtd">
4188            <html lang="de">
4189            <head>
4190                    <title>foodwatch - Newsletter</title>
4191            </head>
4192            <body>
4193              <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
4194                 die Nachrichten aus Japan.</p>
4195            </body>
4196            </html>
4197            --b1_76a486bee62b0d200f33dc2ca08220ad--
4198
4199            """).encode('utf-8')
4200        msg = email.message_from_bytes(source)
4201        s = BytesIO()
4202        g = email.generator.BytesGenerator(s)
4203        g.flatten(msg)
4204        self.assertEqual(s.getvalue(), source)
4205
4206    def test_bytes_generator_b_encoding_linesep(self):
4207        # Issue 14062: b encoding was tacking on an extra \n.
4208        m = Message()
4209        # This has enough non-ascii that it should always end up b encoded.
4210        m['Subject'] = Header('žluťoučký kůň')
4211        s = BytesIO()
4212        g = email.generator.BytesGenerator(s)
4213        g.flatten(m, linesep='\r\n')
4214        self.assertEqual(
4215            s.getvalue(),
4216            b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4217
4218    def test_generator_b_encoding_linesep(self):
4219        # Since this broke in ByteGenerator, test Generator for completeness.
4220        m = Message()
4221        # This has enough non-ascii that it should always end up b encoded.
4222        m['Subject'] = Header('žluťoučký kůň')
4223        s = StringIO()
4224        g = email.generator.Generator(s)
4225        g.flatten(m, linesep='\r\n')
4226        self.assertEqual(
4227            s.getvalue(),
4228            'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4229
4230    maxDiff = None
4231
4232
4233class BaseTestBytesGeneratorIdempotent:
4234
4235    maxDiff = None
4236
4237    def _msgobj(self, filename):
4238        with openfile(filename, 'rb') as fp:
4239            data = fp.read()
4240        data = self.normalize_linesep_regex.sub(self.blinesep, data)
4241        msg = email.message_from_bytes(data)
4242        return msg, data
4243
4244    def _idempotent(self, msg, data, unixfrom=False):
4245        b = BytesIO()
4246        g = email.generator.BytesGenerator(b, maxheaderlen=0)
4247        g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
4248        self.assertEqual(data, b.getvalue())
4249
4250
4251class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
4252                                    TestIdempotent):
4253    linesep = '\n'
4254    blinesep = b'\n'
4255    normalize_linesep_regex = re.compile(br'\r\n')
4256
4257
4258class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
4259                                       TestIdempotent):
4260    linesep = '\r\n'
4261    blinesep = b'\r\n'
4262    normalize_linesep_regex = re.compile(br'(?<!\r)\n')
4263
4264
4265class TestBase64(unittest.TestCase):
4266    def test_len(self):
4267        eq = self.assertEqual
4268        eq(base64mime.header_length('hello'),
4269           len(base64mime.body_encode(b'hello', eol='')))
4270        for size in range(15):
4271            if   size == 0 : bsize = 0
4272            elif size <= 3 : bsize = 4
4273            elif size <= 6 : bsize = 8
4274            elif size <= 9 : bsize = 12
4275            elif size <= 12: bsize = 16
4276            else           : bsize = 20
4277            eq(base64mime.header_length('x' * size), bsize)
4278
4279    def test_decode(self):
4280        eq = self.assertEqual
4281        eq(base64mime.decode(''), b'')
4282        eq(base64mime.decode('aGVsbG8='), b'hello')
4283
4284    def test_encode(self):
4285        eq = self.assertEqual
4286        eq(base64mime.body_encode(b''), '')
4287        eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
4288        # Test the binary flag
4289        eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
4290        # Test the maxlinelen arg
4291        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
4292eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4293eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4294eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4295eHh4eCB4eHh4IA==
4296""")
4297        # Test the eol argument
4298        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4299           """\
4300eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4301eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4302eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4303eHh4eCB4eHh4IA==\r
4304""")
4305
4306    def test_header_encode(self):
4307        eq = self.assertEqual
4308        he = base64mime.header_encode
4309        eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
4310        eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
4311        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4312        # Test the charset option
4313        eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
4314        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4315
4316
4317class TestQuopri(unittest.TestCase):
4318    def setUp(self):
4319        # Set of characters (as byte integers) that don't need to be encoded
4320        # in headers.
4321        self.hlit = list(chain(
4322            range(ord('a'), ord('z') + 1),
4323            range(ord('A'), ord('Z') + 1),
4324            range(ord('0'), ord('9') + 1),
4325            (c for c in b'!*+-/')))
4326        # Set of characters (as byte integers) that do need to be encoded in
4327        # headers.
4328        self.hnon = [c for c in range(256) if c not in self.hlit]
4329        assert len(self.hlit) + len(self.hnon) == 256
4330        # Set of characters (as byte integers) that don't need to be encoded
4331        # in bodies.
4332        self.blit = list(range(ord(' '), ord('~') + 1))
4333        self.blit.append(ord('\t'))
4334        self.blit.remove(ord('='))
4335        # Set of characters (as byte integers) that do need to be encoded in
4336        # bodies.
4337        self.bnon = [c for c in range(256) if c not in self.blit]
4338        assert len(self.blit) + len(self.bnon) == 256
4339
4340    def test_quopri_header_check(self):
4341        for c in self.hlit:
4342            self.assertFalse(quoprimime.header_check(c),
4343                        'Should not be header quopri encoded: %s' % chr(c))
4344        for c in self.hnon:
4345            self.assertTrue(quoprimime.header_check(c),
4346                            'Should be header quopri encoded: %s' % chr(c))
4347
4348    def test_quopri_body_check(self):
4349        for c in self.blit:
4350            self.assertFalse(quoprimime.body_check(c),
4351                        'Should not be body quopri encoded: %s' % chr(c))
4352        for c in self.bnon:
4353            self.assertTrue(quoprimime.body_check(c),
4354                            'Should be body quopri encoded: %s' % chr(c))
4355
4356    def test_header_quopri_len(self):
4357        eq = self.assertEqual
4358        eq(quoprimime.header_length(b'hello'), 5)
4359        # RFC 2047 chrome is not included in header_length().
4360        eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
4361           quoprimime.header_length(b'hello') +
4362           # =?xxx?q?...?= means 10 extra characters
4363           10)
4364        eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4365        # RFC 2047 chrome is not included in header_length().
4366        eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
4367           quoprimime.header_length(b'h@e@l@l@o@') +
4368           # =?xxx?q?...?= means 10 extra characters
4369           10)
4370        for c in self.hlit:
4371            eq(quoprimime.header_length(bytes([c])), 1,
4372               'expected length 1 for %r' % chr(c))
4373        for c in self.hnon:
4374            # Space is special; it's encoded to _
4375            if c == ord(' '):
4376                continue
4377            eq(quoprimime.header_length(bytes([c])), 3,
4378               'expected length 3 for %r' % chr(c))
4379        eq(quoprimime.header_length(b' '), 1)
4380
4381    def test_body_quopri_len(self):
4382        eq = self.assertEqual
4383        for c in self.blit:
4384            eq(quoprimime.body_length(bytes([c])), 1)
4385        for c in self.bnon:
4386            eq(quoprimime.body_length(bytes([c])), 3)
4387
4388    def test_quote_unquote_idempotent(self):
4389        for x in range(256):
4390            c = chr(x)
4391            self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4392
4393    def _test_header_encode(self, header, expected_encoded_header, charset=None):
4394        if charset is None:
4395            encoded_header = quoprimime.header_encode(header)
4396        else:
4397            encoded_header = quoprimime.header_encode(header, charset)
4398        self.assertEqual(encoded_header, expected_encoded_header)
4399
4400    def test_header_encode_null(self):
4401        self._test_header_encode(b'', '')
4402
4403    def test_header_encode_one_word(self):
4404        self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4405
4406    def test_header_encode_two_lines(self):
4407        self._test_header_encode(b'hello\nworld',
4408                                '=?iso-8859-1?q?hello=0Aworld?=')
4409
4410    def test_header_encode_non_ascii(self):
4411        self._test_header_encode(b'hello\xc7there',
4412                                '=?iso-8859-1?q?hello=C7there?=')
4413
4414    def test_header_encode_alt_charset(self):
4415        self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4416                charset='iso-8859-2')
4417
4418    def _test_header_decode(self, encoded_header, expected_decoded_header):
4419        decoded_header = quoprimime.header_decode(encoded_header)
4420        self.assertEqual(decoded_header, expected_decoded_header)
4421
4422    def test_header_decode_null(self):
4423        self._test_header_decode('', '')
4424
4425    def test_header_decode_one_word(self):
4426        self._test_header_decode('hello', 'hello')
4427
4428    def test_header_decode_two_lines(self):
4429        self._test_header_decode('hello=0Aworld', 'hello\nworld')
4430
4431    def test_header_decode_non_ascii(self):
4432        self._test_header_decode('hello=C7there', 'hello\xc7there')
4433
4434    def test_header_decode_re_bug_18380(self):
4435        # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4436        self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4437
4438    def _test_decode(self, encoded, expected_decoded, eol=None):
4439        if eol is None:
4440            decoded = quoprimime.decode(encoded)
4441        else:
4442            decoded = quoprimime.decode(encoded, eol=eol)
4443        self.assertEqual(decoded, expected_decoded)
4444
4445    def test_decode_null_word(self):
4446        self._test_decode('', '')
4447
4448    def test_decode_null_line_null_word(self):
4449        self._test_decode('\r\n', '\n')
4450
4451    def test_decode_one_word(self):
4452        self._test_decode('hello', 'hello')
4453
4454    def test_decode_one_word_eol(self):
4455        self._test_decode('hello', 'hello', eol='X')
4456
4457    def test_decode_one_line(self):
4458        self._test_decode('hello\r\n', 'hello\n')
4459
4460    def test_decode_one_line_lf(self):
4461        self._test_decode('hello\n', 'hello\n')
4462
4463    def test_decode_one_line_cr(self):
4464        self._test_decode('hello\r', 'hello\n')
4465
4466    def test_decode_one_line_nl(self):
4467        self._test_decode('hello\n', 'helloX', eol='X')
4468
4469    def test_decode_one_line_crnl(self):
4470        self._test_decode('hello\r\n', 'helloX', eol='X')
4471
4472    def test_decode_one_line_one_word(self):
4473        self._test_decode('hello\r\nworld', 'hello\nworld')
4474
4475    def test_decode_one_line_one_word_eol(self):
4476        self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4477
4478    def test_decode_two_lines(self):
4479        self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4480
4481    def test_decode_two_lines_eol(self):
4482        self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4483
4484    def test_decode_one_long_line(self):
4485        self._test_decode('Spam' * 250, 'Spam' * 250)
4486
4487    def test_decode_one_space(self):
4488        self._test_decode(' ', '')
4489
4490    def test_decode_multiple_spaces(self):
4491        self._test_decode(' ' * 5, '')
4492
4493    def test_decode_one_line_trailing_spaces(self):
4494        self._test_decode('hello    \r\n', 'hello\n')
4495
4496    def test_decode_two_lines_trailing_spaces(self):
4497        self._test_decode('hello    \r\nworld   \r\n', 'hello\nworld\n')
4498
4499    def test_decode_quoted_word(self):
4500        self._test_decode('=22quoted=20words=22', '"quoted words"')
4501
4502    def test_decode_uppercase_quoting(self):
4503        self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4504
4505    def test_decode_lowercase_quoting(self):
4506        self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4507
4508    def test_decode_soft_line_break(self):
4509        self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4510
4511    def test_decode_false_quoting(self):
4512        self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4513
4514    def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4515        kwargs = {}
4516        if maxlinelen is None:
4517            # Use body_encode's default.
4518            maxlinelen = 76
4519        else:
4520            kwargs['maxlinelen'] = maxlinelen
4521        if eol is None:
4522            # Use body_encode's default.
4523            eol = '\n'
4524        else:
4525            kwargs['eol'] = eol
4526        encoded_body = quoprimime.body_encode(body, **kwargs)
4527        self.assertEqual(encoded_body, expected_encoded_body)
4528        if eol == '\n' or eol == '\r\n':
4529            # We know how to split the result back into lines, so maxlinelen
4530            # can be checked.
4531            for line in encoded_body.splitlines():
4532                self.assertLessEqual(len(line), maxlinelen)
4533
4534    def test_encode_null(self):
4535        self._test_encode('', '')
4536
4537    def test_encode_null_lines(self):
4538        self._test_encode('\n\n', '\n\n')
4539
4540    def test_encode_one_line(self):
4541        self._test_encode('hello\n', 'hello\n')
4542
4543    def test_encode_one_line_crlf(self):
4544        self._test_encode('hello\r\n', 'hello\n')
4545
4546    def test_encode_one_line_eol(self):
4547        self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4548
4549    def test_encode_one_line_eol_after_non_ascii(self):
4550        # issue 20206; see changeset 0cf700464177 for why the encode/decode.
4551        self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'),
4552                          'hello=CF=85\r\n', eol='\r\n')
4553
4554    def test_encode_one_space(self):
4555        self._test_encode(' ', '=20')
4556
4557    def test_encode_one_line_one_space(self):
4558        self._test_encode(' \n', '=20\n')
4559
4560# XXX: body_encode() expect strings, but uses ord(char) from these strings
4561# to index into a 256-entry list.  For code points above 255, this will fail.
4562# Should there be a check for 8-bit only ord() values in body, or at least
4563# a comment about the expected input?
4564
4565    def test_encode_two_lines_one_space(self):
4566        self._test_encode(' \n \n', '=20\n=20\n')
4567
4568    def test_encode_one_word_trailing_spaces(self):
4569        self._test_encode('hello   ', 'hello  =20')
4570
4571    def test_encode_one_line_trailing_spaces(self):
4572        self._test_encode('hello   \n', 'hello  =20\n')
4573
4574    def test_encode_one_word_trailing_tab(self):
4575        self._test_encode('hello  \t', 'hello  =09')
4576
4577    def test_encode_one_line_trailing_tab(self):
4578        self._test_encode('hello  \t\n', 'hello  =09\n')
4579
4580    def test_encode_trailing_space_before_maxlinelen(self):
4581        self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4582
4583    def test_encode_trailing_space_at_maxlinelen(self):
4584        self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4585
4586    def test_encode_trailing_space_beyond_maxlinelen(self):
4587        self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4588
4589    def test_encode_whitespace_lines(self):
4590        self._test_encode(' \n' * 5, '=20\n' * 5)
4591
4592    def test_encode_quoted_equals(self):
4593        self._test_encode('a = b', 'a =3D b')
4594
4595    def test_encode_one_long_string(self):
4596        self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4597
4598    def test_encode_one_long_line(self):
4599        self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4600
4601    def test_encode_one_very_long_line(self):
4602        self._test_encode('x' * 200 + '\n',
4603                2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4604
4605    def test_encode_shortest_maxlinelen(self):
4606        self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
4607
4608    def test_encode_maxlinelen_too_small(self):
4609        self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4610
4611    def test_encode(self):
4612        eq = self.assertEqual
4613        eq(quoprimime.body_encode(''), '')
4614        eq(quoprimime.body_encode('hello'), 'hello')
4615        # Test the binary flag
4616        eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
4617        # Test the maxlinelen arg
4618        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
4619xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4620 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4621x xxxx xxxx xxxx xxxx=20""")
4622        # Test the eol argument
4623        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4624           """\
4625xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4626 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4627x xxxx xxxx xxxx xxxx=20""")
4628        eq(quoprimime.body_encode("""\
4629one line
4630
4631two line"""), """\
4632one line
4633
4634two line""")
4635
4636
4637
4638# Test the Charset class
4639class TestCharset(unittest.TestCase):
4640    def tearDown(self):
4641        from email import charset as CharsetModule
4642        try:
4643            del CharsetModule.CHARSETS['fake']
4644        except KeyError:
4645            pass
4646
4647    def test_codec_encodeable(self):
4648        eq = self.assertEqual
4649        # Make sure us-ascii = no Unicode conversion
4650        c = Charset('us-ascii')
4651        eq(c.header_encode('Hello World!'), 'Hello World!')
4652        # Test 8-bit idempotency with us-ascii
4653        s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
4654        self.assertRaises(UnicodeError, c.header_encode, s)
4655        c = Charset('utf-8')
4656        eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
4657
4658    def test_body_encode(self):
4659        eq = self.assertEqual
4660        # Try a charset with QP body encoding
4661        c = Charset('iso-8859-1')
4662        eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
4663        # Try a charset with Base64 body encoding
4664        c = Charset('utf-8')
4665        eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
4666        # Try a charset with None body encoding
4667        c = Charset('us-ascii')
4668        eq('hello world', c.body_encode('hello world'))
4669        # Try the convert argument, where input codec != output codec
4670        c = Charset('euc-jp')
4671        # With apologies to Tokio Kikuchi ;)
4672        # XXX FIXME
4673##         try:
4674##             eq('\x1b$B5FCO;~IW\x1b(B',
4675##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4676##             eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4677##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4678##         except LookupError:
4679##             # We probably don't have the Japanese codecs installed
4680##             pass
4681        # Testing SF bug #625509, which we have to fake, since there are no
4682        # built-in encodings where the header encoding is QP but the body
4683        # encoding is not.
4684        from email import charset as CharsetModule
4685        CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
4686        c = Charset('fake')
4687        eq('hello world', c.body_encode('hello world'))
4688
4689    def test_unicode_charset_name(self):
4690        charset = Charset('us-ascii')
4691        self.assertEqual(str(charset), 'us-ascii')
4692        self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4693
4694
4695
4696# Test multilingual MIME headers.
4697class TestHeader(TestEmailBase):
4698    def test_simple(self):
4699        eq = self.ndiffAssertEqual
4700        h = Header('Hello World!')
4701        eq(h.encode(), 'Hello World!')
4702        h.append(' Goodbye World!')
4703        eq(h.encode(), 'Hello World!  Goodbye World!')
4704
4705    def test_simple_surprise(self):
4706        eq = self.ndiffAssertEqual
4707        h = Header('Hello World!')
4708        eq(h.encode(), 'Hello World!')
4709        h.append('Goodbye World!')
4710        eq(h.encode(), 'Hello World! Goodbye World!')
4711
4712    def test_header_needs_no_decoding(self):
4713        h = 'no decoding needed'
4714        self.assertEqual(decode_header(h), [(h, None)])
4715
4716    def test_long(self):
4717        h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4718                   maxlinelen=76)
4719        for l in h.encode(splitchars=' ').split('\n '):
4720            self.assertLessEqual(len(l), 76)
4721
4722    def test_multilingual(self):
4723        eq = self.ndiffAssertEqual
4724        g = Charset("iso-8859-1")
4725        cz = Charset("iso-8859-2")
4726        utf8 = Charset("utf-8")
4727        g_head = (b'Die Mieter treten hier ein werden mit einem '
4728                  b'Foerderband komfortabel den Korridor entlang, '
4729                  b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4730                  b'gegen die rotierenden Klingen bef\xf6rdert. ')
4731        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4732                   b'd\xf9vtipu.. ')
4733        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4734                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4735                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4736                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4737                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4738                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4739                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4740                     '\u3044\u307e\u3059\u3002')
4741        h = Header(g_head, g)
4742        h.append(cz_head, cz)
4743        h.append(utf8_head, utf8)
4744        enc = h.encode(maxlinelen=76)
4745        eq(enc, """\
4746=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4747 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4748 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4749 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
4750 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4751 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4752 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4753 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
4754 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4755 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4756 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4757        decoded = decode_header(enc)
4758        eq(len(decoded), 3)
4759        eq(decoded[0], (g_head, 'iso-8859-1'))
4760        eq(decoded[1], (cz_head, 'iso-8859-2'))
4761        eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
4762        ustr = str(h)
4763        eq(ustr,
4764           (b'Die Mieter treten hier ein werden mit einem Foerderband '
4765            b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4766            b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4767            b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4768            b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4769            b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4770            b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4771            b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4772            b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4773            b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4774            b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4775            b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4776            b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4777            b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4778            b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4779            b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4780            ).decode('utf-8'))
4781        # Test make_header()
4782        newh = make_header(decode_header(enc))
4783        eq(newh, h)
4784
4785    def test_empty_header_encode(self):
4786        h = Header()
4787        self.assertEqual(h.encode(), '')
4788
4789    def test_header_ctor_default_args(self):
4790        eq = self.ndiffAssertEqual
4791        h = Header()
4792        eq(h, '')
4793        h.append('foo', Charset('iso-8859-1'))
4794        eq(h, 'foo')
4795
4796    def test_explicit_maxlinelen(self):
4797        eq = self.ndiffAssertEqual
4798        hstr = ('A very long line that must get split to something other '
4799                'than at the 76th character boundary to test the non-default '
4800                'behavior')
4801        h = Header(hstr)
4802        eq(h.encode(), '''\
4803A very long line that must get split to something other than at the 76th
4804 character boundary to test the non-default behavior''')
4805        eq(str(h), hstr)
4806        h = Header(hstr, header_name='Subject')
4807        eq(h.encode(), '''\
4808A very long line that must get split to something other than at the
4809 76th character boundary to test the non-default behavior''')
4810        eq(str(h), hstr)
4811        h = Header(hstr, maxlinelen=1024, header_name='Subject')
4812        eq(h.encode(), hstr)
4813        eq(str(h), hstr)
4814
4815    def test_quopri_splittable(self):
4816        eq = self.ndiffAssertEqual
4817        h = Header(charset='iso-8859-1', maxlinelen=20)
4818        x = 'xxxx ' * 20
4819        h.append(x)
4820        s = h.encode()
4821        eq(s, """\
4822=?iso-8859-1?q?xxx?=
4823 =?iso-8859-1?q?x_?=
4824 =?iso-8859-1?q?xx?=
4825 =?iso-8859-1?q?xx?=
4826 =?iso-8859-1?q?_x?=
4827 =?iso-8859-1?q?xx?=
4828 =?iso-8859-1?q?x_?=
4829 =?iso-8859-1?q?xx?=
4830 =?iso-8859-1?q?xx?=
4831 =?iso-8859-1?q?_x?=
4832 =?iso-8859-1?q?xx?=
4833 =?iso-8859-1?q?x_?=
4834 =?iso-8859-1?q?xx?=
4835 =?iso-8859-1?q?xx?=
4836 =?iso-8859-1?q?_x?=
4837 =?iso-8859-1?q?xx?=
4838 =?iso-8859-1?q?x_?=
4839 =?iso-8859-1?q?xx?=
4840 =?iso-8859-1?q?xx?=
4841 =?iso-8859-1?q?_x?=
4842 =?iso-8859-1?q?xx?=
4843 =?iso-8859-1?q?x_?=
4844 =?iso-8859-1?q?xx?=
4845 =?iso-8859-1?q?xx?=
4846 =?iso-8859-1?q?_x?=
4847 =?iso-8859-1?q?xx?=
4848 =?iso-8859-1?q?x_?=
4849 =?iso-8859-1?q?xx?=
4850 =?iso-8859-1?q?xx?=
4851 =?iso-8859-1?q?_x?=
4852 =?iso-8859-1?q?xx?=
4853 =?iso-8859-1?q?x_?=
4854 =?iso-8859-1?q?xx?=
4855 =?iso-8859-1?q?xx?=
4856 =?iso-8859-1?q?_x?=
4857 =?iso-8859-1?q?xx?=
4858 =?iso-8859-1?q?x_?=
4859 =?iso-8859-1?q?xx?=
4860 =?iso-8859-1?q?xx?=
4861 =?iso-8859-1?q?_x?=
4862 =?iso-8859-1?q?xx?=
4863 =?iso-8859-1?q?x_?=
4864 =?iso-8859-1?q?xx?=
4865 =?iso-8859-1?q?xx?=
4866 =?iso-8859-1?q?_x?=
4867 =?iso-8859-1?q?xx?=
4868 =?iso-8859-1?q?x_?=
4869 =?iso-8859-1?q?xx?=
4870 =?iso-8859-1?q?xx?=
4871 =?iso-8859-1?q?_?=""")
4872        eq(x, str(make_header(decode_header(s))))
4873        h = Header(charset='iso-8859-1', maxlinelen=40)
4874        h.append('xxxx ' * 20)
4875        s = h.encode()
4876        eq(s, """\
4877=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4878 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4879 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4880 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4881 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4882        eq(x, str(make_header(decode_header(s))))
4883
4884    def test_base64_splittable(self):
4885        eq = self.ndiffAssertEqual
4886        h = Header(charset='koi8-r', maxlinelen=20)
4887        x = 'xxxx ' * 20
4888        h.append(x)
4889        s = h.encode()
4890        eq(s, """\
4891=?koi8-r?b?eHh4?=
4892 =?koi8-r?b?eCB4?=
4893 =?koi8-r?b?eHh4?=
4894 =?koi8-r?b?IHh4?=
4895 =?koi8-r?b?eHgg?=
4896 =?koi8-r?b?eHh4?=
4897 =?koi8-r?b?eCB4?=
4898 =?koi8-r?b?eHh4?=
4899 =?koi8-r?b?IHh4?=
4900 =?koi8-r?b?eHgg?=
4901 =?koi8-r?b?eHh4?=
4902 =?koi8-r?b?eCB4?=
4903 =?koi8-r?b?eHh4?=
4904 =?koi8-r?b?IHh4?=
4905 =?koi8-r?b?eHgg?=
4906 =?koi8-r?b?eHh4?=
4907 =?koi8-r?b?eCB4?=
4908 =?koi8-r?b?eHh4?=
4909 =?koi8-r?b?IHh4?=
4910 =?koi8-r?b?eHgg?=
4911 =?koi8-r?b?eHh4?=
4912 =?koi8-r?b?eCB4?=
4913 =?koi8-r?b?eHh4?=
4914 =?koi8-r?b?IHh4?=
4915 =?koi8-r?b?eHgg?=
4916 =?koi8-r?b?eHh4?=
4917 =?koi8-r?b?eCB4?=
4918 =?koi8-r?b?eHh4?=
4919 =?koi8-r?b?IHh4?=
4920 =?koi8-r?b?eHgg?=
4921 =?koi8-r?b?eHh4?=
4922 =?koi8-r?b?eCB4?=
4923 =?koi8-r?b?eHh4?=
4924 =?koi8-r?b?IA==?=""")
4925        eq(x, str(make_header(decode_header(s))))
4926        h = Header(charset='koi8-r', maxlinelen=40)
4927        h.append(x)
4928        s = h.encode()
4929        eq(s, """\
4930=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4931 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4932 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4933 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4934 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4935 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4936        eq(x, str(make_header(decode_header(s))))
4937
4938    def test_us_ascii_header(self):
4939        eq = self.assertEqual
4940        s = 'hello'
4941        x = decode_header(s)
4942        eq(x, [('hello', None)])
4943        h = make_header(x)
4944        eq(s, h.encode())
4945
4946    def test_string_charset(self):
4947        eq = self.assertEqual
4948        h = Header()
4949        h.append('hello', 'iso-8859-1')
4950        eq(h, 'hello')
4951
4952##    def test_unicode_error(self):
4953##        raises = self.assertRaises
4954##        raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4955##        raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4956##        h = Header()
4957##        raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4958##        raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4959##        raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4960
4961    def test_utf8_shortest(self):
4962        eq = self.assertEqual
4963        h = Header('p\xf6stal', 'utf-8')
4964        eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4965        h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4966        eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4967
4968    def test_bad_8bit_header(self):
4969        raises = self.assertRaises
4970        eq = self.assertEqual
4971        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4972        raises(UnicodeError, Header, x)
4973        h = Header()
4974        raises(UnicodeError, h.append, x)
4975        e = x.decode('utf-8', 'replace')
4976        eq(str(Header(x, errors='replace')), e)
4977        h.append(x, errors='replace')
4978        eq(str(h), e)
4979
4980    def test_escaped_8bit_header(self):
4981        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4982        e = x.decode('ascii', 'surrogateescape')
4983        h = Header(e, charset=email.charset.UNKNOWN8BIT)
4984        self.assertEqual(str(h),
4985                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4986        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4987
4988    def test_header_handles_binary_unknown8bit(self):
4989        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4990        h = Header(x, charset=email.charset.UNKNOWN8BIT)
4991        self.assertEqual(str(h),
4992                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4993        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4994
4995    def test_make_header_handles_binary_unknown8bit(self):
4996        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4997        h = Header(x, charset=email.charset.UNKNOWN8BIT)
4998        h2 = email.header.make_header(email.header.decode_header(h))
4999        self.assertEqual(str(h2),
5000                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
5001        self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
5002
5003    def test_modify_returned_list_does_not_change_header(self):
5004        h = Header('test')
5005        chunks = email.header.decode_header(h)
5006        chunks.append(('ascii', 'test2'))
5007        self.assertEqual(str(h), 'test')
5008
5009    def test_encoded_adjacent_nonencoded(self):
5010        eq = self.assertEqual
5011        h = Header()
5012        h.append('hello', 'iso-8859-1')
5013        h.append('world')
5014        s = h.encode()
5015        eq(s, '=?iso-8859-1?q?hello?= world')
5016        h = make_header(decode_header(s))
5017        eq(h.encode(), s)
5018
5019    def test_whitespace_keeper(self):
5020        eq = self.assertEqual
5021        s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
5022        parts = decode_header(s)
5023        eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
5024        hdr = make_header(parts)
5025        eq(hdr.encode(),
5026           'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
5027
5028    def test_broken_base64_header(self):
5029        raises = self.assertRaises
5030        s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
5031        raises(errors.HeaderParseError, decode_header, s)
5032
5033    def test_shift_jis_charset(self):
5034        h = Header('文', charset='shift_jis')
5035        self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
5036
5037    def test_flatten_header_with_no_value(self):
5038        # Issue 11401 (regression from email 4.x)  Note that the space after
5039        # the header doesn't reflect the input, but this is also the way
5040        # email 4.x behaved.  At some point it would be nice to fix that.
5041        msg = email.message_from_string("EmptyHeader:")
5042        self.assertEqual(str(msg), "EmptyHeader: \n\n")
5043
5044    def test_encode_preserves_leading_ws_on_value(self):
5045        msg = Message()
5046        msg['SomeHeader'] = '   value with leading ws'
5047        self.assertEqual(str(msg), "SomeHeader:    value with leading ws\n\n")
5048
5049    def test_whitespace_header(self):
5050        self.assertEqual(Header(' ').encode(), ' ')
5051
5052
5053
5054# Test RFC 2231 header parameters (en/de)coding
5055class TestRFC2231(TestEmailBase):
5056
5057    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5058    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5059    def test_get_param(self):
5060        eq = self.assertEqual
5061        msg = self._msgobj('msg_29.txt')
5062        eq(msg.get_param('title'),
5063           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5064        eq(msg.get_param('title', unquote=False),
5065           ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
5066
5067    def test_set_param(self):
5068        eq = self.ndiffAssertEqual
5069        msg = Message()
5070        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5071                      charset='us-ascii')
5072        eq(msg.get_param('title'),
5073           ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
5074        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5075                      charset='us-ascii', language='en')
5076        eq(msg.get_param('title'),
5077           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5078        msg = self._msgobj('msg_01.txt')
5079        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5080                      charset='us-ascii', language='en')
5081        eq(msg.as_string(maxheaderlen=78), """\
5082Return-Path: <bbb@zzz.org>
5083Delivered-To: bbb@zzz.org
5084Received: by mail.zzz.org (Postfix, from userid 889)
5085\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5086MIME-Version: 1.0
5087Content-Transfer-Encoding: 7bit
5088Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
5089From: bbb@ddd.com (John X. Doe)
5090To: bbb@zzz.org
5091Subject: This is a test message
5092Date: Fri, 4 May 2001 14:05:44 -0400
5093Content-Type: text/plain; charset=us-ascii;
5094 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5095
5096
5097Hi,
5098
5099Do you like this message?
5100
5101-Me
5102""")
5103
5104    def test_set_param_requote(self):
5105        msg = Message()
5106        msg.set_param('title', 'foo')
5107        self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
5108        msg.set_param('title', 'bar', requote=False)
5109        self.assertEqual(msg['content-type'], 'text/plain; title=bar')
5110        # tspecial is still quoted.
5111        msg.set_param('title', "(bar)bell", requote=False)
5112        self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
5113
5114    def test_del_param(self):
5115        eq = self.ndiffAssertEqual
5116        msg = self._msgobj('msg_01.txt')
5117        msg.set_param('foo', 'bar', charset='us-ascii', language='en')
5118        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5119            charset='us-ascii', language='en')
5120        msg.del_param('foo', header='Content-Type')
5121        eq(msg.as_string(maxheaderlen=78), """\
5122Return-Path: <bbb@zzz.org>
5123Delivered-To: bbb@zzz.org
5124Received: by mail.zzz.org (Postfix, from userid 889)
5125\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5126MIME-Version: 1.0
5127Content-Transfer-Encoding: 7bit
5128Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
5129From: bbb@ddd.com (John X. Doe)
5130To: bbb@zzz.org
5131Subject: This is a test message
5132Date: Fri, 4 May 2001 14:05:44 -0400
5133Content-Type: text/plain; charset="us-ascii";
5134 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5135
5136
5137Hi,
5138
5139Do you like this message?
5140
5141-Me
5142""")
5143
5144    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
5145    # I changed the charset name, though, because the one in the file isn't
5146    # a legal charset name.  Should add a test for an illegal charset.
5147    def test_rfc2231_get_content_charset(self):
5148        eq = self.assertEqual
5149        msg = self._msgobj('msg_32.txt')
5150        eq(msg.get_content_charset(), 'us-ascii')
5151
5152    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
5153    def test_rfc2231_parse_rfc_quoting(self):
5154        m = textwrap.dedent('''\
5155            Content-Disposition: inline;
5156            \tfilename*0*=''This%20is%20even%20more%20;
5157            \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
5158            \tfilename*2="is it not.pdf"
5159
5160            ''')
5161        msg = email.message_from_string(m)
5162        self.assertEqual(msg.get_filename(),
5163                         'This is even more ***fun*** is it not.pdf')
5164        self.assertEqual(m, msg.as_string())
5165
5166    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5167    def test_rfc2231_parse_extra_quoting(self):
5168        m = textwrap.dedent('''\
5169            Content-Disposition: inline;
5170            \tfilename*0*="''This%20is%20even%20more%20";
5171            \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5172            \tfilename*2="is it not.pdf"
5173
5174            ''')
5175        msg = email.message_from_string(m)
5176        self.assertEqual(msg.get_filename(),
5177                         'This is even more ***fun*** is it not.pdf')
5178        self.assertEqual(m, msg.as_string())
5179
5180    # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
5181    # but new test uses *0* because otherwise lang/charset is not valid.
5182    # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
5183    def test_rfc2231_no_language_or_charset(self):
5184        m = '''\
5185Content-Transfer-Encoding: 8bit
5186Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
5187Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
5188
5189'''
5190        msg = email.message_from_string(m)
5191        param = msg.get_param('NAME')
5192        self.assertNotIsInstance(param, tuple)
5193        self.assertEqual(
5194            param,
5195            'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
5196
5197    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
5198    def test_rfc2231_no_language_or_charset_in_filename(self):
5199        m = '''\
5200Content-Disposition: inline;
5201\tfilename*0*="''This%20is%20even%20more%20";
5202\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5203\tfilename*2="is it not.pdf"
5204
5205'''
5206        msg = email.message_from_string(m)
5207        self.assertEqual(msg.get_filename(),
5208                         'This is even more ***fun*** is it not.pdf')
5209
5210    # Duplicate of previous test?
5211    def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
5212        m = '''\
5213Content-Disposition: inline;
5214\tfilename*0*="''This%20is%20even%20more%20";
5215\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5216\tfilename*2="is it not.pdf"
5217
5218'''
5219        msg = email.message_from_string(m)
5220        self.assertEqual(msg.get_filename(),
5221                         'This is even more ***fun*** is it not.pdf')
5222
5223    # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
5224    # but the test below is wrong (the first part should be decoded).
5225    def test_rfc2231_partly_encoded(self):
5226        m = '''\
5227Content-Disposition: inline;
5228\tfilename*0="''This%20is%20even%20more%20";
5229\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5230\tfilename*2="is it not.pdf"
5231
5232'''
5233        msg = email.message_from_string(m)
5234        self.assertEqual(
5235            msg.get_filename(),
5236            'This%20is%20even%20more%20***fun*** is it not.pdf')
5237
5238    def test_rfc2231_partly_nonencoded(self):
5239        m = '''\
5240Content-Disposition: inline;
5241\tfilename*0="This%20is%20even%20more%20";
5242\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
5243\tfilename*2="is it not.pdf"
5244
5245'''
5246        msg = email.message_from_string(m)
5247        self.assertEqual(
5248            msg.get_filename(),
5249            'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
5250
5251    def test_rfc2231_no_language_or_charset_in_boundary(self):
5252        m = '''\
5253Content-Type: multipart/alternative;
5254\tboundary*0*="''This%20is%20even%20more%20";
5255\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
5256\tboundary*2="is it not.pdf"
5257
5258'''
5259        msg = email.message_from_string(m)
5260        self.assertEqual(msg.get_boundary(),
5261                         'This is even more ***fun*** is it not.pdf')
5262
5263    def test_rfc2231_no_language_or_charset_in_charset(self):
5264        # This is a nonsensical charset value, but tests the code anyway
5265        m = '''\
5266Content-Type: text/plain;
5267\tcharset*0*="This%20is%20even%20more%20";
5268\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
5269\tcharset*2="is it not.pdf"
5270
5271'''
5272        msg = email.message_from_string(m)
5273        self.assertEqual(msg.get_content_charset(),
5274                         'this is even more ***fun*** is it not.pdf')
5275
5276    # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
5277    def test_rfc2231_bad_encoding_in_filename(self):
5278        m = '''\
5279Content-Disposition: inline;
5280\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
5281\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5282\tfilename*2="is it not.pdf"
5283
5284'''
5285        msg = email.message_from_string(m)
5286        self.assertEqual(msg.get_filename(),
5287                         'This is even more ***fun*** is it not.pdf')
5288
5289    def test_rfc2231_bad_encoding_in_charset(self):
5290        m = """\
5291Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
5292
5293"""
5294        msg = email.message_from_string(m)
5295        # This should return None because non-ascii characters in the charset
5296        # are not allowed.
5297        self.assertEqual(msg.get_content_charset(), None)
5298
5299    def test_rfc2231_bad_character_in_charset(self):
5300        m = """\
5301Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
5302
5303"""
5304        msg = email.message_from_string(m)
5305        # This should return None because non-ascii characters in the charset
5306        # are not allowed.
5307        self.assertEqual(msg.get_content_charset(), None)
5308
5309    def test_rfc2231_bad_character_in_filename(self):
5310        m = '''\
5311Content-Disposition: inline;
5312\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
5313\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5314\tfilename*2*="is it not.pdf%E2"
5315
5316'''
5317        msg = email.message_from_string(m)
5318        self.assertEqual(msg.get_filename(),
5319                         'This is even more ***fun*** is it not.pdf\ufffd')
5320
5321    def test_rfc2231_unknown_encoding(self):
5322        m = """\
5323Content-Transfer-Encoding: 8bit
5324Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
5325
5326"""
5327        msg = email.message_from_string(m)
5328        self.assertEqual(msg.get_filename(), 'myfile.txt')
5329
5330    def test_rfc2231_single_tick_in_filename_extended(self):
5331        eq = self.assertEqual
5332        m = """\
5333Content-Type: application/x-foo;
5334\tname*0*=\"Frank's\"; name*1*=\" Document\"
5335
5336"""
5337        msg = email.message_from_string(m)
5338        charset, language, s = msg.get_param('name')
5339        eq(charset, None)
5340        eq(language, None)
5341        eq(s, "Frank's Document")
5342
5343    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5344    def test_rfc2231_single_tick_in_filename(self):
5345        m = """\
5346Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
5347
5348"""
5349        msg = email.message_from_string(m)
5350        param = msg.get_param('name')
5351        self.assertNotIsInstance(param, tuple)
5352        self.assertEqual(param, "Frank's Document")
5353
5354    def test_rfc2231_missing_tick(self):
5355        m = '''\
5356Content-Disposition: inline;
5357\tfilename*0*="'This%20is%20broken";
5358'''
5359        msg = email.message_from_string(m)
5360        self.assertEqual(
5361            msg.get_filename(),
5362            "'This is broken")
5363
5364    def test_rfc2231_missing_tick_with_encoded_non_ascii(self):
5365        m = '''\
5366Content-Disposition: inline;
5367\tfilename*0*="'This%20is%E2broken";
5368'''
5369        msg = email.message_from_string(m)
5370        self.assertEqual(
5371            msg.get_filename(),
5372            "'This is\ufffdbroken")
5373
5374    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
5375    def test_rfc2231_tick_attack_extended(self):
5376        eq = self.assertEqual
5377        m = """\
5378Content-Type: application/x-foo;
5379\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
5380
5381"""
5382        msg = email.message_from_string(m)
5383        charset, language, s = msg.get_param('name')
5384        eq(charset, 'us-ascii')
5385        eq(language, 'en-us')
5386        eq(s, "Frank's Document")
5387
5388    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
5389    def test_rfc2231_tick_attack(self):
5390        m = """\
5391Content-Type: application/x-foo;
5392\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5393
5394"""
5395        msg = email.message_from_string(m)
5396        param = msg.get_param('name')
5397        self.assertNotIsInstance(param, tuple)
5398        self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5399
5400    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
5401    def test_rfc2231_no_extended_values(self):
5402        eq = self.assertEqual
5403        m = """\
5404Content-Type: application/x-foo; name=\"Frank's Document\"
5405
5406"""
5407        msg = email.message_from_string(m)
5408        eq(msg.get_param('name'), "Frank's Document")
5409
5410    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
5411    def test_rfc2231_encoded_then_unencoded_segments(self):
5412        eq = self.assertEqual
5413        m = """\
5414Content-Type: application/x-foo;
5415\tname*0*=\"us-ascii'en-us'My\";
5416\tname*1=\" Document\";
5417\tname*2*=\" For You\"
5418
5419"""
5420        msg = email.message_from_string(m)
5421        charset, language, s = msg.get_param('name')
5422        eq(charset, 'us-ascii')
5423        eq(language, 'en-us')
5424        eq(s, 'My Document For You')
5425
5426    # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5427    # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
5428    def test_rfc2231_unencoded_then_encoded_segments(self):
5429        eq = self.assertEqual
5430        m = """\
5431Content-Type: application/x-foo;
5432\tname*0=\"us-ascii'en-us'My\";
5433\tname*1*=\" Document\";
5434\tname*2*=\" For You\"
5435
5436"""
5437        msg = email.message_from_string(m)
5438        charset, language, s = msg.get_param('name')
5439        eq(charset, 'us-ascii')
5440        eq(language, 'en-us')
5441        eq(s, 'My Document For You')
5442
5443    def test_should_not_hang_on_invalid_ew_messages(self):
5444        messages = ["""From: user@host.com
5445To: user@host.com
5446Bad-Header:
5447 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
5448 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
5449 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
5450
5451Hello!
5452""", """From: ����� �������� <xxx@xxx>
5453To: "xxx" <xxx@xxx>
5454Subject:   ��� ���������� ����� ����� � ��������� �� ����
5455MIME-Version: 1.0
5456Content-Type: text/plain; charset="windows-1251";
5457Content-Transfer-Encoding: 8bit
5458
5459�� ����� � ���� ������ ��� ��������
5460"""]
5461        for m in messages:
5462            with self.subTest(m=m):
5463                msg = email.message_from_string(m)
5464
5465
5466# Tests to ensure that signed parts of an email are completely preserved, as
5467# required by RFC1847 section 2.1.  Note that these are incomplete, because the
5468# email package does not currently always preserve the body.  See issue 1670765.
5469class TestSigned(TestEmailBase):
5470
5471    def _msg_and_obj(self, filename):
5472        with openfile(filename, encoding="utf-8") as fp:
5473            original = fp.read()
5474            msg = email.message_from_string(original)
5475        return original, msg
5476
5477    def _signed_parts_eq(self, original, result):
5478        # Extract the first mime part of each message
5479        import re
5480        repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5481        inpart = repart.search(original).group(2)
5482        outpart = repart.search(result).group(2)
5483        self.assertEqual(outpart, inpart)
5484
5485    def test_long_headers_as_string(self):
5486        original, msg = self._msg_and_obj('msg_45.txt')
5487        result = msg.as_string()
5488        self._signed_parts_eq(original, result)
5489
5490    def test_long_headers_as_string_maxheaderlen(self):
5491        original, msg = self._msg_and_obj('msg_45.txt')
5492        result = msg.as_string(maxheaderlen=60)
5493        self._signed_parts_eq(original, result)
5494
5495    def test_long_headers_flatten(self):
5496        original, msg = self._msg_and_obj('msg_45.txt')
5497        fp = StringIO()
5498        Generator(fp).flatten(msg)
5499        result = fp.getvalue()
5500        self._signed_parts_eq(original, result)
5501
5502
5503
5504if __name__ == '__main__':
5505    unittest.main()
5506