• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2001-2010 Python Software Foundation
2# Contact: email-sig@python.org
3# email package unit tests
4
5import re
6import time
7import base64
8import unittest
9import textwrap
10import warnings
11
12from io import StringIO, BytesIO
13from itertools import chain
14from random import choice
15from threading import Thread
16from unittest.mock import patch
17
18import email
19import email.policy
20import email.utils
21
22from email.charset import Charset
23from email.generator import Generator, DecodedGenerator, BytesGenerator
24from email.header import Header, decode_header, make_header
25from email.headerregistry import HeaderRegistry
26from email.message import Message
27from email.mime.application import MIMEApplication
28from email.mime.audio import MIMEAudio
29from email.mime.base import MIMEBase
30from email.mime.image import MIMEImage
31from email.mime.message import MIMEMessage
32from email.mime.multipart import MIMEMultipart
33from email.mime.nonmultipart import MIMENonMultipart
34from email.mime.text import MIMEText
35from email.parser import Parser, HeaderParser
36from email import base64mime
37from email import encoders
38from email import errors
39from email import iterators
40from email import quoprimime
41from email import utils
42
43from test.support import threading_helper
44from test.support.os_helper import unlink
45from test.test_email import openfile, TestEmailBase
46
47# These imports are documented to work, but we are testing them using a
48# different path, so we import them here just to make sure they are importable.
49from email.parser import FeedParser, BytesFeedParser
50
51NL = '\n'
52EMPTYSTRING = ''
53SPACE = ' '
54
55
56# Test various aspects of the Message class's API
57class TestMessageAPI(TestEmailBase):
58    def test_get_all(self):
59        eq = self.assertEqual
60        msg = self._msgobj('msg_20.txt')
61        eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
62        eq(msg.get_all('xx', 'n/a'), 'n/a')
63
64    def test_getset_charset(self):
65        eq = self.assertEqual
66        msg = Message()
67        eq(msg.get_charset(), None)
68        charset = Charset('iso-8859-1')
69        msg.set_charset(charset)
70        eq(msg['mime-version'], '1.0')
71        eq(msg.get_content_type(), 'text/plain')
72        eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
73        eq(msg.get_param('charset'), 'iso-8859-1')
74        eq(msg['content-transfer-encoding'], 'quoted-printable')
75        eq(msg.get_charset().input_charset, 'iso-8859-1')
76        # Remove the charset
77        msg.set_charset(None)
78        eq(msg.get_charset(), None)
79        eq(msg['content-type'], 'text/plain')
80        # Try adding a charset when there's already MIME headers present
81        msg = Message()
82        msg['MIME-Version'] = '2.0'
83        msg['Content-Type'] = 'text/x-weird'
84        msg['Content-Transfer-Encoding'] = 'quinted-puntable'
85        msg.set_charset(charset)
86        eq(msg['mime-version'], '2.0')
87        eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
88        eq(msg['content-transfer-encoding'], 'quinted-puntable')
89
90    def test_set_charset_from_string(self):
91        eq = self.assertEqual
92        msg = Message()
93        msg.set_charset('us-ascii')
94        eq(msg.get_charset().input_charset, 'us-ascii')
95        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
96
97    def test_set_payload_with_charset(self):
98        msg = Message()
99        charset = Charset('iso-8859-1')
100        msg.set_payload('This is a string payload', charset)
101        self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
102
103    def test_set_payload_with_8bit_data_and_charset(self):
104        data = b'\xd0\x90\xd0\x91\xd0\x92'
105        charset = Charset('utf-8')
106        msg = Message()
107        msg.set_payload(data, charset)
108        self.assertEqual(msg['content-transfer-encoding'], 'base64')
109        self.assertEqual(msg.get_payload(decode=True), data)
110        self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
111
112    def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
113        data = b'\xd0\x90\xd0\x91\xd0\x92'
114        charset = Charset('utf-8')
115        charset.body_encoding = None # Disable base64 encoding
116        msg = Message()
117        msg.set_payload(data.decode('utf-8'), charset)
118        self.assertEqual(msg['content-transfer-encoding'], '8bit')
119        self.assertEqual(msg.get_payload(decode=True), data)
120
121    def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
122        data = b'\xd0\x90\xd0\x91\xd0\x92'
123        charset = Charset('utf-8')
124        charset.body_encoding = None # Disable base64 encoding
125        msg = Message()
126        msg.set_payload(data, charset)
127        self.assertEqual(msg['content-transfer-encoding'], '8bit')
128        self.assertEqual(msg.get_payload(decode=True), data)
129
130    def test_set_payload_to_list(self):
131        msg = Message()
132        msg.set_payload([])
133        self.assertEqual(msg.get_payload(), [])
134
135    def test_attach_when_payload_is_string(self):
136        msg = Message()
137        msg['Content-Type'] = 'multipart/mixed'
138        msg.set_payload('string payload')
139        sub_msg = MIMEMessage(Message())
140        self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart",
141                               msg.attach, sub_msg)
142
143    def test_get_charsets(self):
144        eq = self.assertEqual
145
146        msg = self._msgobj('msg_08.txt')
147        charsets = msg.get_charsets()
148        eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
149
150        msg = self._msgobj('msg_09.txt')
151        charsets = msg.get_charsets('dingbat')
152        eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
153                      'koi8-r'])
154
155        msg = self._msgobj('msg_12.txt')
156        charsets = msg.get_charsets()
157        eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
158                      'iso-8859-3', 'us-ascii', 'koi8-r'])
159
160    def test_get_filename(self):
161        eq = self.assertEqual
162
163        msg = self._msgobj('msg_04.txt')
164        filenames = [p.get_filename() for p in msg.get_payload()]
165        eq(filenames, ['msg.txt', 'msg.txt'])
166
167        msg = self._msgobj('msg_07.txt')
168        subpart = msg.get_payload(1)
169        eq(subpart.get_filename(), 'dingusfish.gif')
170
171    def test_get_filename_with_name_parameter(self):
172        eq = self.assertEqual
173
174        msg = self._msgobj('msg_44.txt')
175        filenames = [p.get_filename() for p in msg.get_payload()]
176        eq(filenames, ['msg.txt', 'msg.txt'])
177
178    def test_get_boundary(self):
179        eq = self.assertEqual
180        msg = self._msgobj('msg_07.txt')
181        # No quotes!
182        eq(msg.get_boundary(), 'BOUNDARY')
183
184    def test_set_boundary(self):
185        eq = self.assertEqual
186        # This one has no existing boundary parameter, but the Content-Type:
187        # header appears fifth.
188        msg = self._msgobj('msg_01.txt')
189        msg.set_boundary('BOUNDARY')
190        header, value = msg.items()[4]
191        eq(header.lower(), 'content-type')
192        eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
193        # This one has a Content-Type: header, with a boundary, stuck in the
194        # middle of its headers.  Make sure the order is preserved; it should
195        # be fifth.
196        msg = self._msgobj('msg_04.txt')
197        msg.set_boundary('BOUNDARY')
198        header, value = msg.items()[4]
199        eq(header.lower(), 'content-type')
200        eq(value, 'multipart/mixed; boundary="BOUNDARY"')
201        # And this one has no Content-Type: header at all.
202        msg = self._msgobj('msg_03.txt')
203        self.assertRaises(errors.HeaderParseError,
204                          msg.set_boundary, 'BOUNDARY')
205
206    def test_make_boundary(self):
207        msg = MIMEMultipart('form-data')
208        # Note that when the boundary gets created is an implementation
209        # detail and might change.
210        self.assertEqual(msg.items()[0][1], 'multipart/form-data')
211        # Trigger creation of boundary
212        msg.as_string()
213        self.assertEqual(msg.items()[0][1][:33],
214                        'multipart/form-data; boundary="==')
215        # XXX: there ought to be tests of the uniqueness of the boundary, too.
216
217    def test_message_rfc822_only(self):
218        # Issue 7970: message/rfc822 not in multipart parsed by
219        # HeaderParser caused an exception when flattened.
220        with openfile('msg_46.txt', encoding="utf-8") as fp:
221            msgdata = fp.read()
222        parser = HeaderParser()
223        msg = parser.parsestr(msgdata)
224        out = StringIO()
225        gen = Generator(out, True, 0)
226        gen.flatten(msg, False)
227        self.assertEqual(out.getvalue(), msgdata)
228
229    def test_byte_message_rfc822_only(self):
230        # Make sure new bytes header parser also passes this.
231        with openfile('msg_46.txt', encoding="utf-8") as fp:
232            msgdata = fp.read().encode('ascii')
233        parser = email.parser.BytesHeaderParser()
234        msg = parser.parsebytes(msgdata)
235        out = BytesIO()
236        gen = email.generator.BytesGenerator(out)
237        gen.flatten(msg)
238        self.assertEqual(out.getvalue(), msgdata)
239
240    def test_get_decoded_payload(self):
241        eq = self.assertEqual
242        msg = self._msgobj('msg_10.txt')
243        # The outer message is a multipart
244        eq(msg.get_payload(decode=True), None)
245        # Subpart 1 is 7bit encoded
246        eq(msg.get_payload(0).get_payload(decode=True),
247           b'This is a 7bit encoded message.\n')
248        # Subpart 2 is quopri
249        eq(msg.get_payload(1).get_payload(decode=True),
250           b'\xa1This is a Quoted Printable encoded message!\n')
251        # Subpart 3 is base64
252        eq(msg.get_payload(2).get_payload(decode=True),
253           b'This is a Base64 encoded message.')
254        # Subpart 4 is base64 with a trailing newline, which
255        # used to be stripped (issue 7143).
256        eq(msg.get_payload(3).get_payload(decode=True),
257           b'This is a Base64 encoded message.\n')
258        # Subpart 5 has no Content-Transfer-Encoding: header.
259        eq(msg.get_payload(4).get_payload(decode=True),
260           b'This has no Content-Transfer-Encoding: header.\n')
261
262    def test_get_decoded_uu_payload(self):
263        eq = self.assertEqual
264        msg = Message()
265        msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
266        for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
267            msg['content-transfer-encoding'] = cte
268            eq(msg.get_payload(decode=True), b'hello world')
269        # Now try some bogus data
270        msg.set_payload('foo')
271        eq(msg.get_payload(decode=True), b'foo')
272
273    def test_get_payload_n_raises_on_non_multipart(self):
274        msg = Message()
275        self.assertRaises(TypeError, msg.get_payload, 1)
276
277    def test_decoded_generator(self):
278        eq = self.assertEqual
279        msg = self._msgobj('msg_07.txt')
280        with openfile('msg_17.txt', encoding="utf-8") as fp:
281            text = fp.read()
282        s = StringIO()
283        g = DecodedGenerator(s)
284        g.flatten(msg)
285        eq(s.getvalue(), text)
286
287    def test__contains__(self):
288        msg = Message()
289        msg['From'] = 'Me'
290        msg['to'] = 'You'
291        # Check for case insensitivity
292        self.assertIn('from', msg)
293        self.assertIn('From', msg)
294        self.assertIn('FROM', msg)
295        self.assertIn('to', msg)
296        self.assertIn('To', msg)
297        self.assertIn('TO', msg)
298
299    def test_as_string(self):
300        msg = self._msgobj('msg_01.txt')
301        with openfile('msg_01.txt', encoding="utf-8") as fp:
302            text = fp.read()
303        self.assertEqual(text, str(msg))
304        fullrepr = msg.as_string(unixfrom=True)
305        lines = fullrepr.split('\n')
306        self.assertTrue(lines[0].startswith('From '))
307        self.assertEqual(text, NL.join(lines[1:]))
308
309    def test_as_string_policy(self):
310        msg = self._msgobj('msg_01.txt')
311        newpolicy = msg.policy.clone(linesep='\r\n')
312        fullrepr = msg.as_string(policy=newpolicy)
313        s = StringIO()
314        g = Generator(s, policy=newpolicy)
315        g.flatten(msg)
316        self.assertEqual(fullrepr, s.getvalue())
317
318    def test_nonascii_as_string_without_cte(self):
319        m = textwrap.dedent("""\
320            MIME-Version: 1.0
321            Content-type: text/plain; charset="iso-8859-1"
322
323            Test if non-ascii messages with no Content-Transfer-Encoding set
324            can be as_string'd:
325            Föö bär
326            """)
327        source = m.encode('iso-8859-1')
328        expected = textwrap.dedent("""\
329            MIME-Version: 1.0
330            Content-type: text/plain; charset="iso-8859-1"
331            Content-Transfer-Encoding: quoted-printable
332
333            Test if non-ascii messages with no Content-Transfer-Encoding set
334            can be as_string'd:
335            F=F6=F6 b=E4r
336            """)
337        msg = email.message_from_bytes(source)
338        self.assertEqual(msg.as_string(), expected)
339
340    def test_nonascii_as_string_without_content_type_and_cte(self):
341        m = textwrap.dedent("""\
342            MIME-Version: 1.0
343
344            Test if non-ascii messages with no Content-Type nor
345            Content-Transfer-Encoding set can be as_string'd:
346            Föö bär
347            """)
348        source = m.encode('iso-8859-1')
349        expected = source.decode('ascii', 'replace')
350        msg = email.message_from_bytes(source)
351        self.assertEqual(msg.as_string(), expected)
352
353    def test_as_bytes(self):
354        msg = self._msgobj('msg_01.txt')
355        with openfile('msg_01.txt', encoding="utf-8") as fp:
356            data = fp.read().encode('ascii')
357        self.assertEqual(data, bytes(msg))
358        fullrepr = msg.as_bytes(unixfrom=True)
359        lines = fullrepr.split(b'\n')
360        self.assertTrue(lines[0].startswith(b'From '))
361        self.assertEqual(data, b'\n'.join(lines[1:]))
362
363    def test_as_bytes_policy(self):
364        msg = self._msgobj('msg_01.txt')
365        newpolicy = msg.policy.clone(linesep='\r\n')
366        fullrepr = msg.as_bytes(policy=newpolicy)
367        s = BytesIO()
368        g = BytesGenerator(s,policy=newpolicy)
369        g.flatten(msg)
370        self.assertEqual(fullrepr, s.getvalue())
371
372    # test_headerregistry.TestContentTypeHeader.bad_params
373    def test_bad_param(self):
374        msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
375        self.assertEqual(msg.get_param('baz'), '')
376
377    def test_missing_filename(self):
378        msg = email.message_from_string("From: foo\n")
379        self.assertEqual(msg.get_filename(), None)
380
381    def test_bogus_filename(self):
382        msg = email.message_from_string(
383        "Content-Disposition: blarg; filename\n")
384        self.assertEqual(msg.get_filename(), '')
385
386    def test_missing_boundary(self):
387        msg = email.message_from_string("From: foo\n")
388        self.assertEqual(msg.get_boundary(), None)
389
390    def test_get_params(self):
391        eq = self.assertEqual
392        msg = email.message_from_string(
393            'X-Header: foo=one; bar=two; baz=three\n')
394        eq(msg.get_params(header='x-header'),
395           [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
396        msg = email.message_from_string(
397            'X-Header: foo; bar=one; baz=two\n')
398        eq(msg.get_params(header='x-header'),
399           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
400        eq(msg.get_params(), None)
401        msg = email.message_from_string(
402            'X-Header: foo; bar="one"; baz=two\n')
403        eq(msg.get_params(header='x-header'),
404           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
405
406    # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
407    def test_get_param_liberal(self):
408        msg = Message()
409        msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
410        self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
411
412    def test_get_param(self):
413        eq = self.assertEqual
414        msg = email.message_from_string(
415            "X-Header: foo=one; bar=two; baz=three\n")
416        eq(msg.get_param('bar', header='x-header'), 'two')
417        eq(msg.get_param('quuz', header='x-header'), None)
418        eq(msg.get_param('quuz'), None)
419        msg = email.message_from_string(
420            'X-Header: foo; bar="one"; baz=two\n')
421        eq(msg.get_param('foo', header='x-header'), '')
422        eq(msg.get_param('bar', header='x-header'), 'one')
423        eq(msg.get_param('baz', header='x-header'), 'two')
424        # XXX: We are not RFC-2045 compliant!  We cannot parse:
425        # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
426        # msg.get_param("weird")
427        # yet.
428
429    # test_headerregistry.TestContentTypeHeader.spaces_around_semis
430    def test_get_param_funky_continuation_lines(self):
431        msg = self._msgobj('msg_22.txt')
432        self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
433
434    # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
435    def test_get_param_with_semis_in_quotes(self):
436        msg = email.message_from_string(
437            'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
438        self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
439        self.assertEqual(msg.get_param('name', unquote=False),
440                         '"Jim&amp;&amp;Jill"')
441
442    # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
443    def test_get_param_with_quotes(self):
444        msg = email.message_from_string(
445            'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
446        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
447        msg = email.message_from_string(
448            "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
449        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
450
451    def test_field_containment(self):
452        msg = email.message_from_string('Header: exists')
453        self.assertIn('header', msg)
454        self.assertIn('Header', msg)
455        self.assertIn('HEADER', msg)
456        self.assertNotIn('headerx', msg)
457
458    def test_set_param(self):
459        eq = self.assertEqual
460        msg = Message()
461        msg.set_param('charset', 'iso-2022-jp')
462        eq(msg.get_param('charset'), 'iso-2022-jp')
463        msg.set_param('importance', 'high value')
464        eq(msg.get_param('importance'), 'high value')
465        eq(msg.get_param('importance', unquote=False), '"high value"')
466        eq(msg.get_params(), [('text/plain', ''),
467                              ('charset', 'iso-2022-jp'),
468                              ('importance', 'high value')])
469        eq(msg.get_params(unquote=False), [('text/plain', ''),
470                                       ('charset', '"iso-2022-jp"'),
471                                       ('importance', '"high value"')])
472        msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
473        eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
474
475    def test_del_param(self):
476        eq = self.assertEqual
477        msg = self._msgobj('msg_05.txt')
478        eq(msg.get_params(),
479           [('multipart/report', ''), ('report-type', 'delivery-status'),
480            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
481        old_val = msg.get_param("report-type")
482        msg.del_param("report-type")
483        eq(msg.get_params(),
484           [('multipart/report', ''),
485            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
486        msg.set_param("report-type", old_val)
487        eq(msg.get_params(),
488           [('multipart/report', ''),
489            ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
490            ('report-type', old_val)])
491
492    def test_del_param_on_other_header(self):
493        msg = Message()
494        msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
495        msg.del_param('filename', 'content-disposition')
496        self.assertEqual(msg['content-disposition'], 'attachment')
497
498    def test_del_param_on_nonexistent_header(self):
499        msg = Message()
500        # Deleting param on empty msg should not raise exception.
501        msg.del_param('filename', 'content-disposition')
502
503    def test_del_nonexistent_param(self):
504        msg = Message()
505        msg.add_header('Content-Type', 'text/plain', charset='utf-8')
506        existing_header = msg['Content-Type']
507        msg.del_param('foobar', header='Content-Type')
508        self.assertEqual(msg['Content-Type'], existing_header)
509
510    def test_set_type(self):
511        eq = self.assertEqual
512        msg = Message()
513        self.assertRaises(ValueError, msg.set_type, 'text')
514        msg.set_type('text/plain')
515        eq(msg['content-type'], 'text/plain')
516        msg.set_param('charset', 'us-ascii')
517        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
518        msg.set_type('text/html')
519        eq(msg['content-type'], 'text/html; charset="us-ascii"')
520
521    def test_set_type_on_other_header(self):
522        msg = Message()
523        msg['X-Content-Type'] = 'text/plain'
524        msg.set_type('application/octet-stream', 'X-Content-Type')
525        self.assertEqual(msg['x-content-type'], 'application/octet-stream')
526
527    def test_get_content_type_missing(self):
528        msg = Message()
529        self.assertEqual(msg.get_content_type(), 'text/plain')
530
531    def test_get_content_type_missing_with_default_type(self):
532        msg = Message()
533        msg.set_default_type('message/rfc822')
534        self.assertEqual(msg.get_content_type(), 'message/rfc822')
535
536    def test_get_content_type_from_message_implicit(self):
537        msg = self._msgobj('msg_30.txt')
538        self.assertEqual(msg.get_payload(0).get_content_type(),
539                         'message/rfc822')
540
541    def test_get_content_type_from_message_explicit(self):
542        msg = self._msgobj('msg_28.txt')
543        self.assertEqual(msg.get_payload(0).get_content_type(),
544                         'message/rfc822')
545
546    def test_get_content_type_from_message_text_plain_implicit(self):
547        msg = self._msgobj('msg_03.txt')
548        self.assertEqual(msg.get_content_type(), 'text/plain')
549
550    def test_get_content_type_from_message_text_plain_explicit(self):
551        msg = self._msgobj('msg_01.txt')
552        self.assertEqual(msg.get_content_type(), 'text/plain')
553
554    def test_get_content_maintype_missing(self):
555        msg = Message()
556        self.assertEqual(msg.get_content_maintype(), 'text')
557
558    def test_get_content_maintype_missing_with_default_type(self):
559        msg = Message()
560        msg.set_default_type('message/rfc822')
561        self.assertEqual(msg.get_content_maintype(), 'message')
562
563    def test_get_content_maintype_from_message_implicit(self):
564        msg = self._msgobj('msg_30.txt')
565        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
566
567    def test_get_content_maintype_from_message_explicit(self):
568        msg = self._msgobj('msg_28.txt')
569        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
570
571    def test_get_content_maintype_from_message_text_plain_implicit(self):
572        msg = self._msgobj('msg_03.txt')
573        self.assertEqual(msg.get_content_maintype(), 'text')
574
575    def test_get_content_maintype_from_message_text_plain_explicit(self):
576        msg = self._msgobj('msg_01.txt')
577        self.assertEqual(msg.get_content_maintype(), 'text')
578
579    def test_get_content_subtype_missing(self):
580        msg = Message()
581        self.assertEqual(msg.get_content_subtype(), 'plain')
582
583    def test_get_content_subtype_missing_with_default_type(self):
584        msg = Message()
585        msg.set_default_type('message/rfc822')
586        self.assertEqual(msg.get_content_subtype(), 'rfc822')
587
588    def test_get_content_subtype_from_message_implicit(self):
589        msg = self._msgobj('msg_30.txt')
590        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
591
592    def test_get_content_subtype_from_message_explicit(self):
593        msg = self._msgobj('msg_28.txt')
594        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
595
596    def test_get_content_subtype_from_message_text_plain_implicit(self):
597        msg = self._msgobj('msg_03.txt')
598        self.assertEqual(msg.get_content_subtype(), 'plain')
599
600    def test_get_content_subtype_from_message_text_plain_explicit(self):
601        msg = self._msgobj('msg_01.txt')
602        self.assertEqual(msg.get_content_subtype(), 'plain')
603
604    def test_get_content_maintype_error(self):
605        msg = Message()
606        msg['Content-Type'] = 'no-slash-in-this-string'
607        self.assertEqual(msg.get_content_maintype(), 'text')
608
609    def test_get_content_subtype_error(self):
610        msg = Message()
611        msg['Content-Type'] = 'no-slash-in-this-string'
612        self.assertEqual(msg.get_content_subtype(), 'plain')
613
614    def test_replace_header(self):
615        eq = self.assertEqual
616        msg = Message()
617        msg.add_header('First', 'One')
618        msg.add_header('Second', 'Two')
619        msg.add_header('Third', 'Three')
620        eq(msg.keys(), ['First', 'Second', 'Third'])
621        eq(msg.values(), ['One', 'Two', 'Three'])
622        msg.replace_header('Second', 'Twenty')
623        eq(msg.keys(), ['First', 'Second', 'Third'])
624        eq(msg.values(), ['One', 'Twenty', 'Three'])
625        msg.add_header('First', 'Eleven')
626        msg.replace_header('First', 'One Hundred')
627        eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
628        eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
629        self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
630
631    def test_get_content_disposition(self):
632        msg = Message()
633        self.assertIsNone(msg.get_content_disposition())
634        msg.add_header('Content-Disposition', 'attachment',
635                       filename='random.avi')
636        self.assertEqual(msg.get_content_disposition(), 'attachment')
637        msg.replace_header('Content-Disposition', 'inline')
638        self.assertEqual(msg.get_content_disposition(), 'inline')
639        msg.replace_header('Content-Disposition', 'InlinE')
640        self.assertEqual(msg.get_content_disposition(), 'inline')
641
642    # test_defect_handling:test_invalid_chars_in_base64_payload
643    def test_broken_base64_payload(self):
644        x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
645        msg = Message()
646        msg['content-type'] = 'audio/x-midi'
647        msg['content-transfer-encoding'] = 'base64'
648        msg.set_payload(x)
649        self.assertEqual(msg.get_payload(decode=True),
650                         (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
651                          b'\xa1\x00p\xf6\xbf\xe9\x0f'))
652        self.assertIsInstance(msg.defects[0],
653                              errors.InvalidBase64CharactersDefect)
654
655    def test_broken_unicode_payload(self):
656        # This test improves coverage but is not a compliance test.
657        # The behavior in this situation is currently undefined by the API.
658        x = 'this is a br\xf6ken thing to do'
659        msg = Message()
660        msg['content-type'] = 'text/plain'
661        msg['content-transfer-encoding'] = '8bit'
662        msg.set_payload(x)
663        self.assertEqual(msg.get_payload(decode=True),
664                         bytes(x, 'raw-unicode-escape'))
665
666    def test_questionable_bytes_payload(self):
667        # This test improves coverage but is not a compliance test,
668        # since it involves poking inside the black box.
669        x = 'this is a quéstionable thing to do'.encode('utf-8')
670        msg = Message()
671        msg['content-type'] = 'text/plain; charset="utf-8"'
672        msg['content-transfer-encoding'] = '8bit'
673        msg._payload = x
674        self.assertEqual(msg.get_payload(decode=True), x)
675
676    # Issue 1078919
677    def test_ascii_add_header(self):
678        msg = Message()
679        msg.add_header('Content-Disposition', 'attachment',
680                       filename='bud.gif')
681        self.assertEqual('attachment; filename="bud.gif"',
682            msg['Content-Disposition'])
683
684    def test_noascii_add_header(self):
685        msg = Message()
686        msg.add_header('Content-Disposition', 'attachment',
687            filename="Fußballer.ppt")
688        self.assertEqual(
689            'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
690            msg['Content-Disposition'])
691
692    def test_nonascii_add_header_via_triple(self):
693        msg = Message()
694        msg.add_header('Content-Disposition', 'attachment',
695            filename=('iso-8859-1', '', 'Fußballer.ppt'))
696        self.assertEqual(
697            'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
698            msg['Content-Disposition'])
699
700    def test_ascii_add_header_with_tspecial(self):
701        msg = Message()
702        msg.add_header('Content-Disposition', 'attachment',
703            filename="windows [filename].ppt")
704        self.assertEqual(
705            'attachment; filename="windows [filename].ppt"',
706            msg['Content-Disposition'])
707
708    def test_nonascii_add_header_with_tspecial(self):
709        msg = Message()
710        msg.add_header('Content-Disposition', 'attachment',
711            filename="Fußballer [filename].ppt")
712        self.assertEqual(
713            "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
714            msg['Content-Disposition'])
715
716    def test_binary_quopri_payload(self):
717        for charset in ('latin-1', 'ascii'):
718            msg = Message()
719            msg['content-type'] = 'text/plain; charset=%s' % charset
720            msg['content-transfer-encoding'] = 'quoted-printable'
721            msg.set_payload(b'foo=e6=96=87bar')
722            self.assertEqual(
723                msg.get_payload(decode=True),
724                b'foo\xe6\x96\x87bar',
725                'get_payload returns wrong result with charset %s.' % charset)
726
727    def test_binary_base64_payload(self):
728        for charset in ('latin-1', 'ascii'):
729            msg = Message()
730            msg['content-type'] = 'text/plain; charset=%s' % charset
731            msg['content-transfer-encoding'] = 'base64'
732            msg.set_payload(b'Zm9v5paHYmFy')
733            self.assertEqual(
734                msg.get_payload(decode=True),
735                b'foo\xe6\x96\x87bar',
736                'get_payload returns wrong result with charset %s.' % charset)
737
738    def test_binary_uuencode_payload(self):
739        for charset in ('latin-1', 'ascii'):
740            for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
741                msg = Message()
742                msg['content-type'] = 'text/plain; charset=%s' % charset
743                msg['content-transfer-encoding'] = encoding
744                msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
745                self.assertEqual(
746                    msg.get_payload(decode=True),
747                    b'foo\xe6\x96\x87bar',
748                    str(('get_payload returns wrong result ',
749                         'with charset {0} and encoding {1}.')).\
750                        format(charset, encoding))
751
752    def test_add_header_with_name_only_param(self):
753        msg = Message()
754        msg.add_header('Content-Disposition', 'inline', foo_bar=None)
755        self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
756
757    def test_add_header_with_no_value(self):
758        msg = Message()
759        msg.add_header('X-Status', None)
760        self.assertEqual('', msg['X-Status'])
761
762    # Issue 5871: reject an attempt to embed a header inside a header value
763    # (header injection attack).
764    def test_embedded_header_via_Header_rejected(self):
765        msg = Message()
766        msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
767        self.assertRaises(errors.HeaderParseError, msg.as_string)
768
769    def test_embedded_header_via_string_rejected(self):
770        msg = Message()
771        msg['Dummy'] = 'dummy\nX-Injected-Header: test'
772        self.assertRaises(errors.HeaderParseError, msg.as_string)
773
774    def test_unicode_header_defaults_to_utf8_encoding(self):
775        # Issue 14291
776        m = MIMEText('abc\n')
777        m['Subject'] = 'É test'
778        self.assertEqual(str(m),textwrap.dedent("""\
779            Content-Type: text/plain; charset="us-ascii"
780            MIME-Version: 1.0
781            Content-Transfer-Encoding: 7bit
782            Subject: =?utf-8?q?=C3=89_test?=
783
784            abc
785            """))
786
787    def test_unicode_body_defaults_to_utf8_encoding(self):
788        # Issue 14291
789        m = MIMEText('É testabc\n')
790        self.assertEqual(str(m),textwrap.dedent("""\
791            Content-Type: text/plain; charset="utf-8"
792            MIME-Version: 1.0
793            Content-Transfer-Encoding: base64
794
795            w4kgdGVzdGFiYwo=
796            """))
797
798
799# Test the email.encoders module
800class TestEncoders(unittest.TestCase):
801
802    def test_EncodersEncode_base64(self):
803        with openfile('python.gif', 'rb') as fp:
804            bindata = fp.read()
805        mimed = email.mime.image.MIMEImage(bindata)
806        base64ed = mimed.get_payload()
807        # the transfer-encoded body lines should all be <=76 characters
808        lines = base64ed.split('\n')
809        self.assertLessEqual(max([ len(x) for x in lines ]), 76)
810
811    def test_encode_empty_payload(self):
812        eq = self.assertEqual
813        msg = Message()
814        msg.set_charset('us-ascii')
815        eq(msg['content-transfer-encoding'], '7bit')
816
817    def test_default_cte(self):
818        eq = self.assertEqual
819        # 7bit data and the default us-ascii _charset
820        msg = MIMEText('hello world')
821        eq(msg['content-transfer-encoding'], '7bit')
822        # Similar, but with 8bit data
823        msg = MIMEText('hello \xf8 world')
824        eq(msg['content-transfer-encoding'], 'base64')
825        # And now with a different charset
826        msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
827        eq(msg['content-transfer-encoding'], 'quoted-printable')
828
829    def test_encode7or8bit(self):
830        # Make sure a charset whose input character set is 8bit but
831        # whose output character set is 7bit gets a transfer-encoding
832        # of 7bit.
833        eq = self.assertEqual
834        msg = MIMEText('文\n', _charset='euc-jp')
835        eq(msg['content-transfer-encoding'], '7bit')
836        eq(msg.as_string(), textwrap.dedent("""\
837            MIME-Version: 1.0
838            Content-Type: text/plain; charset="iso-2022-jp"
839            Content-Transfer-Encoding: 7bit
840
841            \x1b$BJ8\x1b(B
842            """))
843
844    def test_qp_encode_latin1(self):
845        msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
846        self.assertEqual(str(msg), textwrap.dedent("""\
847            MIME-Version: 1.0
848            Content-Type: text/text; charset="iso-8859-1"
849            Content-Transfer-Encoding: quoted-printable
850
851            =E1=F6
852            """))
853
854    def test_qp_encode_non_latin1(self):
855        # Issue 16948
856        msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
857        self.assertEqual(str(msg), textwrap.dedent("""\
858            MIME-Version: 1.0
859            Content-Type: text/text; charset="iso-8859-2"
860            Content-Transfer-Encoding: quoted-printable
861
862            =BF
863            """))
864
865
866# Test long header wrapping
867class TestLongHeaders(TestEmailBase):
868
869    maxDiff = None
870
871    def test_split_long_continuation(self):
872        eq = self.ndiffAssertEqual
873        msg = email.message_from_string("""\
874Subject: bug demonstration
875\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
876\tmore text
877
878test
879""")
880        sfp = StringIO()
881        g = Generator(sfp)
882        g.flatten(msg)
883        eq(sfp.getvalue(), """\
884Subject: bug demonstration
885\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
886\tmore text
887
888test
889""")
890
891    def test_another_long_almost_unsplittable_header(self):
892        eq = self.ndiffAssertEqual
893        hstr = """\
894bug demonstration
895\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
896\tmore text"""
897        h = Header(hstr, continuation_ws='\t')
898        eq(h.encode(), """\
899bug demonstration
900\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
901\tmore text""")
902        h = Header(hstr.replace('\t', ' '))
903        eq(h.encode(), """\
904bug demonstration
905 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
906 more text""")
907
908    def test_long_nonstring(self):
909        eq = self.ndiffAssertEqual
910        g = Charset("iso-8859-1")
911        cz = Charset("iso-8859-2")
912        utf8 = Charset("utf-8")
913        g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
914                  b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
915                  b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
916                  b'bef\xf6rdert. ')
917        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
918                   b'd\xf9vtipu.. ')
919        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
920                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
921                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
922                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
923                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
924                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
925                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
926                     '\u3044\u307e\u3059\u3002')
927        h = Header(g_head, g, header_name='Subject')
928        h.append(cz_head, cz)
929        h.append(utf8_head, utf8)
930        msg = Message()
931        msg['Subject'] = h
932        sfp = StringIO()
933        g = Generator(sfp)
934        g.flatten(msg)
935        eq(sfp.getvalue(), """\
936Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
937 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
938 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
939 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
940 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
941 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
942 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
943 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
944 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
945 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
946 =?utf-8?b?44CC?=
947
948""")
949        eq(h.encode(maxlinelen=76), """\
950=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
951 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
952 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
953 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
954 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
955 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
956 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
957 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
958 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
959 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
960 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
961
962    def test_long_header_encode(self):
963        eq = self.ndiffAssertEqual
964        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
965                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
966                   header_name='X-Foobar-Spoink-Defrobnit')
967        eq(h.encode(), '''\
968wasnipoop; giraffes="very-long-necked-animals";
969 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
970
971    def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
972        eq = self.ndiffAssertEqual
973        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
974                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
975                   header_name='X-Foobar-Spoink-Defrobnit',
976                   continuation_ws='\t')
977        eq(h.encode(), '''\
978wasnipoop; giraffes="very-long-necked-animals";
979 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
980
981    def test_long_header_encode_with_tab_continuation(self):
982        eq = self.ndiffAssertEqual
983        h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
984                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
985                   header_name='X-Foobar-Spoink-Defrobnit',
986                   continuation_ws='\t')
987        eq(h.encode(), '''\
988wasnipoop; giraffes="very-long-necked-animals";
989\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
990
991    def test_header_encode_with_different_output_charset(self):
992        h = Header('文', 'euc-jp')
993        self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
994
995    def test_long_header_encode_with_different_output_charset(self):
996        h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
997            b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
998            b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
999            b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
1000        res = """\
1001=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
1002 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
1003        self.assertEqual(h.encode(), res)
1004
1005    def test_header_splitter(self):
1006        eq = self.ndiffAssertEqual
1007        msg = MIMEText('')
1008        # It'd be great if we could use add_header() here, but that doesn't
1009        # guarantee an order of the parameters.
1010        msg['X-Foobar-Spoink-Defrobnit'] = (
1011            'wasnipoop; giraffes="very-long-necked-animals"; '
1012            'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
1013        sfp = StringIO()
1014        g = Generator(sfp)
1015        g.flatten(msg)
1016        eq(sfp.getvalue(), '''\
1017Content-Type: text/plain; charset="us-ascii"
1018MIME-Version: 1.0
1019Content-Transfer-Encoding: 7bit
1020X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
1021 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
1022
1023''')
1024
1025    def test_no_semis_header_splitter(self):
1026        eq = self.ndiffAssertEqual
1027        msg = Message()
1028        msg['From'] = 'test@dom.ain'
1029        msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
1030        msg.set_payload('Test')
1031        sfp = StringIO()
1032        g = Generator(sfp)
1033        g.flatten(msg)
1034        eq(sfp.getvalue(), """\
1035From: test@dom.ain
1036References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
1037 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
1038
1039Test""")
1040
1041    def test_last_split_chunk_does_not_fit(self):
1042        eq = self.ndiffAssertEqual
1043        h = Header('Subject: the first part of this is short, but_the_second'
1044            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1045            '_all_by_itself')
1046        eq(h.encode(), """\
1047Subject: the first part of this is short,
1048 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1049
1050    def test_splittable_leading_char_followed_by_overlong_unsplittable(self):
1051        eq = self.ndiffAssertEqual
1052        h = Header(', but_the_second'
1053            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1054            '_all_by_itself')
1055        eq(h.encode(), """\
1056,
1057 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1058
1059    def test_multiple_splittable_leading_char_followed_by_overlong_unsplittable(self):
1060        eq = self.ndiffAssertEqual
1061        h = Header(', , but_the_second'
1062            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1063            '_all_by_itself')
1064        eq(h.encode(), """\
1065, ,
1066 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1067
1068    def test_trailing_splittable_on_overlong_unsplittable(self):
1069        eq = self.ndiffAssertEqual
1070        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1071            'be_on_a_line_all_by_itself;')
1072        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
1073            "be_on_a_line_all_by_itself;")
1074
1075    def test_trailing_splittable_on_overlong_unsplittable_with_leading_splittable(self):
1076        eq = self.ndiffAssertEqual
1077        h = Header('; '
1078            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1079            'be_on_a_line_all_by_itself; ')
1080        eq(h.encode(), """\
1081;
1082 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1083
1084    def test_long_header_with_multiple_sequential_split_chars(self):
1085        eq = self.ndiffAssertEqual
1086        h = Header('This is a long line that has two whitespaces  in a row.  '
1087            'This used to cause truncation of the header when folded')
1088        eq(h.encode(), """\
1089This is a long line that has two whitespaces  in a row.  This used to cause
1090 truncation of the header when folded""")
1091
1092    def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
1093        eq = self.ndiffAssertEqual
1094        h = Header('thisverylongheaderhas;semicolons;and,commas,but'
1095            'they;arenotlegal;fold,points')
1096        eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
1097                        "arenotlegal;fold,points")
1098
1099    def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
1100        eq = self.ndiffAssertEqual
1101        h = Header('this is a  test where we need to have more than one line '
1102            'before; our final line that is just too big to fit;; '
1103            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1104            'be_on_a_line_all_by_itself;')
1105        eq(h.encode(), """\
1106this is a  test where we need to have more than one line before;
1107 our final line that is just too big to fit;;
1108 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
1109
1110    def test_overlong_last_part_followed_by_split_point(self):
1111        eq = self.ndiffAssertEqual
1112        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1113            'be_on_a_line_all_by_itself ')
1114        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
1115                        "should_be_on_a_line_all_by_itself ")
1116
1117    def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
1118        eq = self.ndiffAssertEqual
1119        h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
1120            'before_our_final_line_; ; '
1121            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1122            'be_on_a_line_all_by_itself; ')
1123        eq(h.encode(), """\
1124this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
1125 ;
1126 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1127
1128    def test_multiline_with_overlong_last_part_followed_by_split_point(self):
1129        eq = self.ndiffAssertEqual
1130        h = Header('this is a test where we need to have more than one line '
1131            'before our final line; ; '
1132            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1133            'be_on_a_line_all_by_itself; ')
1134        eq(h.encode(), """\
1135this is a test where we need to have more than one line before our final line;
1136 ;
1137 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1138
1139    def test_long_header_with_whitespace_runs(self):
1140        eq = self.ndiffAssertEqual
1141        msg = Message()
1142        msg['From'] = 'test@dom.ain'
1143        msg['References'] = SPACE.join(['<foo@dom.ain>  '] * 10)
1144        msg.set_payload('Test')
1145        sfp = StringIO()
1146        g = Generator(sfp)
1147        g.flatten(msg)
1148        eq(sfp.getvalue(), """\
1149From: test@dom.ain
1150References: <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>
1151   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>
1152   <foo@dom.ain>   <foo@dom.ain>\x20\x20
1153
1154Test""")
1155
1156    def test_long_run_with_semi_header_splitter(self):
1157        eq = self.ndiffAssertEqual
1158        msg = Message()
1159        msg['From'] = 'test@dom.ain'
1160        msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
1161        msg.set_payload('Test')
1162        sfp = StringIO()
1163        g = Generator(sfp)
1164        g.flatten(msg)
1165        eq(sfp.getvalue(), """\
1166From: test@dom.ain
1167References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1168 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1169 <foo@dom.ain>; abc
1170
1171Test""")
1172
1173    def test_splitter_split_on_punctuation_only_if_fws(self):
1174        eq = self.ndiffAssertEqual
1175        msg = Message()
1176        msg['From'] = 'test@dom.ain'
1177        msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1178            'they;arenotlegal;fold,points')
1179        msg.set_payload('Test')
1180        sfp = StringIO()
1181        g = Generator(sfp)
1182        g.flatten(msg)
1183        # XXX the space after the header should not be there.
1184        eq(sfp.getvalue(), """\
1185From: test@dom.ain
1186References:\x20
1187 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1188
1189Test""")
1190
1191    def test_no_split_long_header(self):
1192        eq = self.ndiffAssertEqual
1193        hstr = 'References: ' + 'x' * 80
1194        h = Header(hstr)
1195        # These come on two lines because Headers are really field value
1196        # classes and don't really know about their field names.
1197        eq(h.encode(), """\
1198References:
1199 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1200        h = Header('x' * 80)
1201        eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
1202
1203    def test_splitting_multiple_long_lines(self):
1204        eq = self.ndiffAssertEqual
1205        hstr = """\
1206from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1207\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1208\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1209"""
1210        h = Header(hstr, continuation_ws='\t')
1211        eq(h.encode(), """\
1212from babylon.socal-raves.org (localhost [127.0.0.1]);
1213 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1214 for <mailman-admin@babylon.socal-raves.org>;
1215 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1216\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1217 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1218 for <mailman-admin@babylon.socal-raves.org>;
1219 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1220\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1221 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1222 for <mailman-admin@babylon.socal-raves.org>;
1223 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1224
1225    def test_splitting_first_line_only_is_long(self):
1226        eq = self.ndiffAssertEqual
1227        hstr = """\
1228from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1229\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1230\tid 17k4h5-00034i-00
1231\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1232        h = Header(hstr, maxlinelen=78, header_name='Received',
1233                   continuation_ws='\t')
1234        eq(h.encode(), """\
1235from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1236 helo=cthulhu.gerg.ca)
1237\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1238\tid 17k4h5-00034i-00
1239\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1240
1241    def test_long_8bit_header(self):
1242        eq = self.ndiffAssertEqual
1243        msg = Message()
1244        h = Header('Britische Regierung gibt', 'iso-8859-1',
1245                    header_name='Subject')
1246        h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
1247        eq(h.encode(maxlinelen=76), """\
1248=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1249 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
1250        msg['Subject'] = h
1251        eq(msg.as_string(maxheaderlen=76), """\
1252Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1253 =?iso-8859-1?q?hore-Windkraftprojekte?=
1254
1255""")
1256        eq(msg.as_string(maxheaderlen=0), """\
1257Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
1258
1259""")
1260
1261    def test_long_8bit_header_no_charset(self):
1262        eq = self.ndiffAssertEqual
1263        msg = Message()
1264        header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1265                         'f\xfcr Offshore-Windkraftprojekte '
1266                         '<a-very-long-address@example.com>')
1267        msg['Reply-To'] = header_string
1268        eq(msg.as_string(maxheaderlen=78), """\
1269Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1270 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1271
1272""")
1273        msg = Message()
1274        msg['Reply-To'] = Header(header_string,
1275                                 header_name='Reply-To')
1276        eq(msg.as_string(maxheaderlen=78), """\
1277Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1278 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1279
1280""")
1281
1282    def test_long_to_header(self):
1283        eq = self.ndiffAssertEqual
1284        to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
1285              '<someone@eecs.umich.edu>, '
1286              '"Someone Test #B" <someone@umich.edu>, '
1287              '"Someone Test #C" <someone@eecs.umich.edu>, '
1288              '"Someone Test #D" <someone@eecs.umich.edu>')
1289        msg = Message()
1290        msg['To'] = to
1291        eq(msg.as_string(maxheaderlen=78), '''\
1292To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
1293 "Someone Test #B" <someone@umich.edu>,
1294 "Someone Test #C" <someone@eecs.umich.edu>,
1295 "Someone Test #D" <someone@eecs.umich.edu>
1296
1297''')
1298
1299    def test_long_line_after_append(self):
1300        eq = self.ndiffAssertEqual
1301        s = 'This is an example of string which has almost the limit of header length.'
1302        h = Header(s)
1303        h.append('Add another line.')
1304        eq(h.encode(maxlinelen=76), """\
1305This is an example of string which has almost the limit of header length.
1306 Add another line.""")
1307
1308    def test_shorter_line_with_append(self):
1309        eq = self.ndiffAssertEqual
1310        s = 'This is a shorter line.'
1311        h = Header(s)
1312        h.append('Add another sentence. (Surprise?)')
1313        eq(h.encode(),
1314           'This is a shorter line. Add another sentence. (Surprise?)')
1315
1316    def test_long_field_name(self):
1317        eq = self.ndiffAssertEqual
1318        fn = 'X-Very-Very-Very-Long-Header-Name'
1319        gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1320              'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1321              'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1322              'bef\xf6rdert. ')
1323        h = Header(gs, 'iso-8859-1', header_name=fn)
1324        # BAW: this seems broken because the first line is too long
1325        eq(h.encode(maxlinelen=76), """\
1326=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1327 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1328 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1329 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
1330
1331    def test_long_received_header(self):
1332        h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1333             'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1334             'Wed, 05 Mar 2003 18:10:18 -0700')
1335        msg = Message()
1336        msg['Received-1'] = Header(h, continuation_ws='\t')
1337        msg['Received-2'] = h
1338        # This should be splitting on spaces not semicolons.
1339        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1340Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1341 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1342 Wed, 05 Mar 2003 18:10:18 -0700
1343Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1344 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1345 Wed, 05 Mar 2003 18:10:18 -0700
1346
1347""")
1348
1349    def test_string_headerinst_eq(self):
1350        h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1351             'tu-muenchen.de> (David Bremner\'s message of '
1352             '"Thu, 6 Mar 2003 13:58:21 +0100")')
1353        msg = Message()
1354        msg['Received-1'] = Header(h, header_name='Received-1',
1355                                   continuation_ws='\t')
1356        msg['Received-2'] = h
1357        # XXX The space after the ':' should not be there.
1358        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1359Received-1:\x20
1360 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1361 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1362Received-2:\x20
1363 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1364 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1365
1366""")
1367
1368    def test_long_unbreakable_lines_with_continuation(self):
1369        eq = self.ndiffAssertEqual
1370        msg = Message()
1371        t = """\
1372iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1373 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1374        msg['Face-1'] = t
1375        msg['Face-2'] = Header(t, header_name='Face-2')
1376        msg['Face-3'] = ' ' + t
1377        # XXX This splitting is all wrong.  It the first value line should be
1378        # snug against the field name or the space after the header not there.
1379        eq(msg.as_string(maxheaderlen=78), """\
1380Face-1:\x20
1381 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1382 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1383Face-2:\x20
1384 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1385 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1386Face-3:\x20
1387 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1388 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1389
1390""")
1391
1392    def test_another_long_multiline_header(self):
1393        eq = self.ndiffAssertEqual
1394        m = ('Received: from siimage.com '
1395             '([172.25.1.3]) by zima.siliconimage.com with '
1396             'Microsoft SMTPSVC(5.0.2195.4905); '
1397             'Wed, 16 Oct 2002 07:41:11 -0700')
1398        msg = email.message_from_string(m)
1399        eq(msg.as_string(maxheaderlen=78), '''\
1400Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1401 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
1402
1403''')
1404
1405    def test_long_lines_with_different_header(self):
1406        eq = self.ndiffAssertEqual
1407        h = ('List-Unsubscribe: '
1408             '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1409             '        <mailto:spamassassin-talk-request@lists.sourceforge.net'
1410             '?subject=unsubscribe>')
1411        msg = Message()
1412        msg['List'] = h
1413        msg['List'] = Header(h, header_name='List')
1414        eq(msg.as_string(maxheaderlen=78), """\
1415List: List-Unsubscribe:
1416 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1417        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1418List: List-Unsubscribe:
1419 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1420        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1421
1422""")
1423
1424    def test_long_rfc2047_header_with_embedded_fws(self):
1425        h = Header(textwrap.dedent("""\
1426            We're going to pretend this header is in a non-ascii character set
1427            \tto see if line wrapping with encoded words and embedded
1428               folding white space works"""),
1429                   charset='utf-8',
1430                   header_name='Test')
1431        self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1432            =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1433             =?utf-8?q?cter_set?=
1434             =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1435             =?utf-8?q?_folding_white_space_works?=""")+'\n')
1436
1437
1438
1439# Test mangling of "From " lines in the body of a message
1440class TestFromMangling(unittest.TestCase):
1441    def setUp(self):
1442        self.msg = Message()
1443        self.msg['From'] = 'aaa@bbb.org'
1444        self.msg.set_payload("""\
1445From the desk of A.A.A.:
1446Blah blah blah
1447""")
1448
1449    def test_mangled_from(self):
1450        s = StringIO()
1451        g = Generator(s, mangle_from_=True)
1452        g.flatten(self.msg)
1453        self.assertEqual(s.getvalue(), """\
1454From: aaa@bbb.org
1455
1456>From the desk of A.A.A.:
1457Blah blah blah
1458""")
1459
1460    def test_dont_mangle_from(self):
1461        s = StringIO()
1462        g = Generator(s, mangle_from_=False)
1463        g.flatten(self.msg)
1464        self.assertEqual(s.getvalue(), """\
1465From: aaa@bbb.org
1466
1467From the desk of A.A.A.:
1468Blah blah blah
1469""")
1470
1471    def test_mangle_from_in_preamble_and_epilog(self):
1472        s = StringIO()
1473        g = Generator(s, mangle_from_=True)
1474        msg = email.message_from_string(textwrap.dedent("""\
1475            From: foo@bar.com
1476            Mime-Version: 1.0
1477            Content-Type: multipart/mixed; boundary=XXX
1478
1479            From somewhere unknown
1480
1481            --XXX
1482            Content-Type: text/plain
1483
1484            foo
1485
1486            --XXX--
1487
1488            From somewhere unknowable
1489            """))
1490        g.flatten(msg)
1491        self.assertEqual(len([1 for x in s.getvalue().split('\n')
1492                                  if x.startswith('>From ')]), 2)
1493
1494    def test_mangled_from_with_bad_bytes(self):
1495        source = textwrap.dedent("""\
1496            Content-Type: text/plain; charset="utf-8"
1497            MIME-Version: 1.0
1498            Content-Transfer-Encoding: 8bit
1499            From: aaa@bbb.org
1500
1501        """).encode('utf-8')
1502        msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1503        b = BytesIO()
1504        g = BytesGenerator(b, mangle_from_=True)
1505        g.flatten(msg)
1506        self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1507
1508    def test_multipart_with_bad_bytes_in_cte(self):
1509        # bpo30835
1510        source = textwrap.dedent("""\
1511            From: aperson@example.com
1512            Content-Type: multipart/mixed; boundary="1"
1513            Content-Transfer-Encoding: \xc8
1514        """).encode('utf-8')
1515        msg = email.message_from_bytes(source)
1516
1517
1518# Test the basic MIMEAudio class
1519class TestMIMEAudio(unittest.TestCase):
1520    def _make_audio(self, ext):
1521        with openfile(f'sndhdr.{ext}', 'rb') as fp:
1522            self._audiodata = fp.read()
1523        self._au = MIMEAudio(self._audiodata)
1524
1525    def test_guess_minor_type(self):
1526        for ext, subtype in {
1527            'aifc': 'x-aiff',
1528            'aiff': 'x-aiff',
1529            'wav': 'x-wav',
1530            'au': 'basic',
1531        }.items():
1532            self._make_audio(ext)
1533            subtype = ext if subtype is None else subtype
1534            self.assertEqual(self._au.get_content_type(), f'audio/{subtype}')
1535
1536    def test_encoding(self):
1537        self._make_audio('au')
1538        payload = self._au.get_payload()
1539        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1540                         self._audiodata)
1541
1542    def test_checkSetMinor(self):
1543        self._make_audio('au')
1544        au = MIMEAudio(self._audiodata, 'fish')
1545        self.assertEqual(au.get_content_type(), 'audio/fish')
1546
1547    def test_add_header(self):
1548        self._make_audio('au')
1549        eq = self.assertEqual
1550        self._au.add_header('Content-Disposition', 'attachment',
1551                            filename='sndhdr.au')
1552        eq(self._au['content-disposition'],
1553           'attachment; filename="sndhdr.au"')
1554        eq(self._au.get_params(header='content-disposition'),
1555           [('attachment', ''), ('filename', 'sndhdr.au')])
1556        eq(self._au.get_param('filename', header='content-disposition'),
1557           'sndhdr.au')
1558        missing = []
1559        eq(self._au.get_param('attachment', header='content-disposition'), '')
1560        self.assertIs(self._au.get_param(
1561            'foo', failobj=missing,
1562            header='content-disposition'), missing)
1563        # Try some missing stuff
1564        self.assertIs(self._au.get_param('foobar', missing), missing)
1565        self.assertIs(self._au.get_param('attachment', missing,
1566                                         header='foobar'), missing)
1567
1568
1569
1570# Test the basic MIMEImage class
1571class TestMIMEImage(unittest.TestCase):
1572    def _make_image(self, ext):
1573        with openfile(f'python.{ext}', 'rb') as fp:
1574            self._imgdata = fp.read()
1575        self._im = MIMEImage(self._imgdata)
1576
1577    def test_guess_minor_type(self):
1578        for ext, subtype in {
1579            'bmp': None,
1580            'exr': None,
1581            'gif': None,
1582            'jpg': 'jpeg',
1583            'pbm': None,
1584            'pgm': None,
1585            'png': None,
1586            'ppm': None,
1587            'ras': 'rast',
1588            'sgi': 'rgb',
1589            'tiff': None,
1590            'webp': None,
1591            'xbm': None,
1592        }.items():
1593            self._make_image(ext)
1594            subtype = ext if subtype is None else subtype
1595            self.assertEqual(self._im.get_content_type(), f'image/{subtype}')
1596
1597    def test_encoding(self):
1598        self._make_image('gif')
1599        payload = self._im.get_payload()
1600        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1601                         self._imgdata)
1602
1603    def test_checkSetMinor(self):
1604        self._make_image('gif')
1605        im = MIMEImage(self._imgdata, 'fish')
1606        self.assertEqual(im.get_content_type(), 'image/fish')
1607
1608    def test_add_header(self):
1609        self._make_image('gif')
1610        eq = self.assertEqual
1611        self._im.add_header('Content-Disposition', 'attachment',
1612                            filename='dingusfish.gif')
1613        eq(self._im['content-disposition'],
1614           'attachment; filename="dingusfish.gif"')
1615        eq(self._im.get_params(header='content-disposition'),
1616           [('attachment', ''), ('filename', 'dingusfish.gif')])
1617        eq(self._im.get_param('filename', header='content-disposition'),
1618           'dingusfish.gif')
1619        missing = []
1620        eq(self._im.get_param('attachment', header='content-disposition'), '')
1621        self.assertIs(self._im.get_param('foo', failobj=missing,
1622                                         header='content-disposition'), missing)
1623        # Try some missing stuff
1624        self.assertIs(self._im.get_param('foobar', missing), missing)
1625        self.assertIs(self._im.get_param('attachment', missing,
1626                                         header='foobar'), missing)
1627
1628
1629# Test the basic MIMEApplication class
1630class TestMIMEApplication(unittest.TestCase):
1631    def test_headers(self):
1632        eq = self.assertEqual
1633        msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
1634        eq(msg.get_content_type(), 'application/octet-stream')
1635        eq(msg['content-transfer-encoding'], 'base64')
1636
1637    def test_body(self):
1638        eq = self.assertEqual
1639        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1640        msg = MIMEApplication(bytesdata)
1641        # whitespace in the cte encoded block is RFC-irrelevant.
1642        eq(msg.get_payload().strip(), '+vv8/f7/')
1643        eq(msg.get_payload(decode=True), bytesdata)
1644
1645    def test_binary_body_with_encode_7or8bit(self):
1646        # Issue 17171.
1647        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1648        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1649        # Treated as a string, this will be invalid code points.
1650        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1651        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1652        self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1653        s = BytesIO()
1654        g = BytesGenerator(s)
1655        g.flatten(msg)
1656        wireform = s.getvalue()
1657        msg2 = email.message_from_bytes(wireform)
1658        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1659        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1660        self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1661
1662    def test_binary_body_with_encode_noop(self):
1663        # Issue 16564: This does not produce an RFC valid message, since to be
1664        # valid it should have a CTE of binary.  But the below works in
1665        # Python2, and is documented as working this way.
1666        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1667        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1668        # Treated as a string, this will be invalid code points.
1669        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1670        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1671        s = BytesIO()
1672        g = BytesGenerator(s)
1673        g.flatten(msg)
1674        wireform = s.getvalue()
1675        msg2 = email.message_from_bytes(wireform)
1676        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1677        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1678
1679    def test_binary_body_with_unicode_linend_encode_noop(self):
1680        # Issue 19003: This is a variation on #16564.
1681        bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff'
1682        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1683        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1684        s = BytesIO()
1685        g = BytesGenerator(s)
1686        g.flatten(msg)
1687        wireform = s.getvalue()
1688        msg2 = email.message_from_bytes(wireform)
1689        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1690
1691    def test_binary_body_with_encode_quopri(self):
1692        # Issue 14360.
1693        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1694        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1695        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1696        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1697        self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1698        s = BytesIO()
1699        g = BytesGenerator(s)
1700        g.flatten(msg)
1701        wireform = s.getvalue()
1702        msg2 = email.message_from_bytes(wireform)
1703        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1704        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1705        self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1706
1707    def test_binary_body_with_encode_base64(self):
1708        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1709        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1710        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1711        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1712        s = BytesIO()
1713        g = BytesGenerator(s)
1714        g.flatten(msg)
1715        wireform = s.getvalue()
1716        msg2 = email.message_from_bytes(wireform)
1717        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1718        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1719
1720
1721# Test the basic MIMEText class
1722class TestMIMEText(unittest.TestCase):
1723    def setUp(self):
1724        self._msg = MIMEText('hello there')
1725
1726    def test_types(self):
1727        eq = self.assertEqual
1728        eq(self._msg.get_content_type(), 'text/plain')
1729        eq(self._msg.get_param('charset'), 'us-ascii')
1730        missing = []
1731        self.assertIs(self._msg.get_param('foobar', missing), missing)
1732        self.assertIs(self._msg.get_param('charset', missing, header='foobar'),
1733                      missing)
1734
1735    def test_payload(self):
1736        self.assertEqual(self._msg.get_payload(), 'hello there')
1737        self.assertFalse(self._msg.is_multipart())
1738
1739    def test_charset(self):
1740        eq = self.assertEqual
1741        msg = MIMEText('hello there', _charset='us-ascii')
1742        eq(msg.get_charset().input_charset, 'us-ascii')
1743        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1744        # Also accept a Charset instance
1745        charset = Charset('utf-8')
1746        charset.body_encoding = None
1747        msg = MIMEText('hello there', _charset=charset)
1748        eq(msg.get_charset().input_charset, 'utf-8')
1749        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1750        eq(msg.get_payload(), 'hello there')
1751
1752    def test_7bit_input(self):
1753        eq = self.assertEqual
1754        msg = MIMEText('hello there', _charset='us-ascii')
1755        eq(msg.get_charset().input_charset, 'us-ascii')
1756        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1757
1758    def test_7bit_input_no_charset(self):
1759        eq = self.assertEqual
1760        msg = MIMEText('hello there')
1761        eq(msg.get_charset(), 'us-ascii')
1762        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1763        self.assertIn('hello there', msg.as_string())
1764
1765    def test_utf8_input(self):
1766        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1767        eq = self.assertEqual
1768        msg = MIMEText(teststr, _charset='utf-8')
1769        eq(msg.get_charset().output_charset, 'utf-8')
1770        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1771        eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1772
1773    @unittest.skip("can't fix because of backward compat in email5, "
1774        "will fix in email6")
1775    def test_utf8_input_no_charset(self):
1776        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1777        self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1778
1779
1780
1781# Test complicated multipart/* messages
1782class TestMultipart(TestEmailBase):
1783    def setUp(self):
1784        with openfile('python.gif', 'rb') as fp:
1785            data = fp.read()
1786        container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1787        image = MIMEImage(data, name='dingusfish.gif')
1788        image.add_header('content-disposition', 'attachment',
1789                         filename='dingusfish.gif')
1790        intro = MIMEText('''\
1791Hi there,
1792
1793This is the dingus fish.
1794''')
1795        container.attach(intro)
1796        container.attach(image)
1797        container['From'] = 'Barry <barry@digicool.com>'
1798        container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1799        container['Subject'] = 'Here is your dingus fish'
1800
1801        now = 987809702.54848599
1802        timetuple = time.localtime(now)
1803        if timetuple[-1] == 0:
1804            tzsecs = time.timezone
1805        else:
1806            tzsecs = time.altzone
1807        if tzsecs > 0:
1808            sign = '-'
1809        else:
1810            sign = '+'
1811        tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1812        container['Date'] = time.strftime(
1813            '%a, %d %b %Y %H:%M:%S',
1814            time.localtime(now)) + tzoffset
1815        self._msg = container
1816        self._im = image
1817        self._txt = intro
1818
1819    def test_hierarchy(self):
1820        # convenience
1821        eq = self.assertEqual
1822        raises = self.assertRaises
1823        # tests
1824        m = self._msg
1825        self.assertTrue(m.is_multipart())
1826        eq(m.get_content_type(), 'multipart/mixed')
1827        eq(len(m.get_payload()), 2)
1828        raises(IndexError, m.get_payload, 2)
1829        m0 = m.get_payload(0)
1830        m1 = m.get_payload(1)
1831        self.assertIs(m0, self._txt)
1832        self.assertIs(m1, self._im)
1833        eq(m.get_payload(), [m0, m1])
1834        self.assertFalse(m0.is_multipart())
1835        self.assertFalse(m1.is_multipart())
1836
1837    def test_empty_multipart_idempotent(self):
1838        text = """\
1839Content-Type: multipart/mixed; boundary="BOUNDARY"
1840MIME-Version: 1.0
1841Subject: A subject
1842To: aperson@dom.ain
1843From: bperson@dom.ain
1844
1845
1846--BOUNDARY
1847
1848
1849--BOUNDARY--
1850"""
1851        msg = Parser().parsestr(text)
1852        self.ndiffAssertEqual(text, msg.as_string())
1853
1854    def test_no_parts_in_a_multipart_with_none_epilogue(self):
1855        outer = MIMEBase('multipart', 'mixed')
1856        outer['Subject'] = 'A subject'
1857        outer['To'] = 'aperson@dom.ain'
1858        outer['From'] = 'bperson@dom.ain'
1859        outer.set_boundary('BOUNDARY')
1860        self.ndiffAssertEqual(outer.as_string(), '''\
1861Content-Type: multipart/mixed; boundary="BOUNDARY"
1862MIME-Version: 1.0
1863Subject: A subject
1864To: aperson@dom.ain
1865From: bperson@dom.ain
1866
1867--BOUNDARY
1868
1869--BOUNDARY--
1870''')
1871
1872    def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1873        outer = MIMEBase('multipart', 'mixed')
1874        outer['Subject'] = 'A subject'
1875        outer['To'] = 'aperson@dom.ain'
1876        outer['From'] = 'bperson@dom.ain'
1877        outer.preamble = ''
1878        outer.epilogue = ''
1879        outer.set_boundary('BOUNDARY')
1880        self.ndiffAssertEqual(outer.as_string(), '''\
1881Content-Type: multipart/mixed; boundary="BOUNDARY"
1882MIME-Version: 1.0
1883Subject: A subject
1884To: aperson@dom.ain
1885From: bperson@dom.ain
1886
1887
1888--BOUNDARY
1889
1890--BOUNDARY--
1891''')
1892
1893    def test_one_part_in_a_multipart(self):
1894        eq = self.ndiffAssertEqual
1895        outer = MIMEBase('multipart', 'mixed')
1896        outer['Subject'] = 'A subject'
1897        outer['To'] = 'aperson@dom.ain'
1898        outer['From'] = 'bperson@dom.ain'
1899        outer.set_boundary('BOUNDARY')
1900        msg = MIMEText('hello world')
1901        outer.attach(msg)
1902        eq(outer.as_string(), '''\
1903Content-Type: multipart/mixed; boundary="BOUNDARY"
1904MIME-Version: 1.0
1905Subject: A subject
1906To: aperson@dom.ain
1907From: bperson@dom.ain
1908
1909--BOUNDARY
1910Content-Type: text/plain; charset="us-ascii"
1911MIME-Version: 1.0
1912Content-Transfer-Encoding: 7bit
1913
1914hello world
1915--BOUNDARY--
1916''')
1917
1918    def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1919        eq = self.ndiffAssertEqual
1920        outer = MIMEBase('multipart', 'mixed')
1921        outer['Subject'] = 'A subject'
1922        outer['To'] = 'aperson@dom.ain'
1923        outer['From'] = 'bperson@dom.ain'
1924        outer.preamble = ''
1925        msg = MIMEText('hello world')
1926        outer.attach(msg)
1927        outer.set_boundary('BOUNDARY')
1928        eq(outer.as_string(), '''\
1929Content-Type: multipart/mixed; boundary="BOUNDARY"
1930MIME-Version: 1.0
1931Subject: A subject
1932To: aperson@dom.ain
1933From: bperson@dom.ain
1934
1935
1936--BOUNDARY
1937Content-Type: text/plain; charset="us-ascii"
1938MIME-Version: 1.0
1939Content-Transfer-Encoding: 7bit
1940
1941hello world
1942--BOUNDARY--
1943''')
1944
1945
1946    def test_seq_parts_in_a_multipart_with_none_preamble(self):
1947        eq = self.ndiffAssertEqual
1948        outer = MIMEBase('multipart', 'mixed')
1949        outer['Subject'] = 'A subject'
1950        outer['To'] = 'aperson@dom.ain'
1951        outer['From'] = 'bperson@dom.ain'
1952        outer.preamble = None
1953        msg = MIMEText('hello world')
1954        outer.attach(msg)
1955        outer.set_boundary('BOUNDARY')
1956        eq(outer.as_string(), '''\
1957Content-Type: multipart/mixed; boundary="BOUNDARY"
1958MIME-Version: 1.0
1959Subject: A subject
1960To: aperson@dom.ain
1961From: bperson@dom.ain
1962
1963--BOUNDARY
1964Content-Type: text/plain; charset="us-ascii"
1965MIME-Version: 1.0
1966Content-Transfer-Encoding: 7bit
1967
1968hello world
1969--BOUNDARY--
1970''')
1971
1972
1973    def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1974        eq = self.ndiffAssertEqual
1975        outer = MIMEBase('multipart', 'mixed')
1976        outer['Subject'] = 'A subject'
1977        outer['To'] = 'aperson@dom.ain'
1978        outer['From'] = 'bperson@dom.ain'
1979        outer.epilogue = None
1980        msg = MIMEText('hello world')
1981        outer.attach(msg)
1982        outer.set_boundary('BOUNDARY')
1983        eq(outer.as_string(), '''\
1984Content-Type: multipart/mixed; boundary="BOUNDARY"
1985MIME-Version: 1.0
1986Subject: A subject
1987To: aperson@dom.ain
1988From: bperson@dom.ain
1989
1990--BOUNDARY
1991Content-Type: text/plain; charset="us-ascii"
1992MIME-Version: 1.0
1993Content-Transfer-Encoding: 7bit
1994
1995hello world
1996--BOUNDARY--
1997''')
1998
1999
2000    def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
2001        eq = self.ndiffAssertEqual
2002        outer = MIMEBase('multipart', 'mixed')
2003        outer['Subject'] = 'A subject'
2004        outer['To'] = 'aperson@dom.ain'
2005        outer['From'] = 'bperson@dom.ain'
2006        outer.epilogue = ''
2007        msg = MIMEText('hello world')
2008        outer.attach(msg)
2009        outer.set_boundary('BOUNDARY')
2010        eq(outer.as_string(), '''\
2011Content-Type: multipart/mixed; boundary="BOUNDARY"
2012MIME-Version: 1.0
2013Subject: A subject
2014To: aperson@dom.ain
2015From: bperson@dom.ain
2016
2017--BOUNDARY
2018Content-Type: text/plain; charset="us-ascii"
2019MIME-Version: 1.0
2020Content-Transfer-Encoding: 7bit
2021
2022hello world
2023--BOUNDARY--
2024''')
2025
2026
2027    def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
2028        eq = self.ndiffAssertEqual
2029        outer = MIMEBase('multipart', 'mixed')
2030        outer['Subject'] = 'A subject'
2031        outer['To'] = 'aperson@dom.ain'
2032        outer['From'] = 'bperson@dom.ain'
2033        outer.epilogue = '\n'
2034        msg = MIMEText('hello world')
2035        outer.attach(msg)
2036        outer.set_boundary('BOUNDARY')
2037        eq(outer.as_string(), '''\
2038Content-Type: multipart/mixed; boundary="BOUNDARY"
2039MIME-Version: 1.0
2040Subject: A subject
2041To: aperson@dom.ain
2042From: bperson@dom.ain
2043
2044--BOUNDARY
2045Content-Type: text/plain; charset="us-ascii"
2046MIME-Version: 1.0
2047Content-Transfer-Encoding: 7bit
2048
2049hello world
2050--BOUNDARY--
2051
2052''')
2053
2054    def test_message_external_body(self):
2055        eq = self.assertEqual
2056        msg = self._msgobj('msg_36.txt')
2057        eq(len(msg.get_payload()), 2)
2058        msg1 = msg.get_payload(1)
2059        eq(msg1.get_content_type(), 'multipart/alternative')
2060        eq(len(msg1.get_payload()), 2)
2061        for subpart in msg1.get_payload():
2062            eq(subpart.get_content_type(), 'message/external-body')
2063            eq(len(subpart.get_payload()), 1)
2064            subsubpart = subpart.get_payload(0)
2065            eq(subsubpart.get_content_type(), 'text/plain')
2066
2067    def test_double_boundary(self):
2068        # msg_37.txt is a multipart that contains two dash-boundary's in a
2069        # row.  Our interpretation of RFC 2046 calls for ignoring the second
2070        # and subsequent boundaries.
2071        msg = self._msgobj('msg_37.txt')
2072        self.assertEqual(len(msg.get_payload()), 3)
2073
2074    def test_nested_inner_contains_outer_boundary(self):
2075        eq = self.ndiffAssertEqual
2076        # msg_38.txt has an inner part that contains outer boundaries.  My
2077        # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
2078        # these are illegal and should be interpreted as unterminated inner
2079        # parts.
2080        msg = self._msgobj('msg_38.txt')
2081        sfp = StringIO()
2082        iterators._structure(msg, sfp)
2083        eq(sfp.getvalue(), """\
2084multipart/mixed
2085    multipart/mixed
2086        multipart/alternative
2087            text/plain
2088        text/plain
2089    text/plain
2090    text/plain
2091""")
2092
2093    def test_nested_with_same_boundary(self):
2094        eq = self.ndiffAssertEqual
2095        # msg 39.txt is similarly evil in that it's got inner parts that use
2096        # the same boundary as outer parts.  Again, I believe the way this is
2097        # parsed is closest to the spirit of RFC 2046
2098        msg = self._msgobj('msg_39.txt')
2099        sfp = StringIO()
2100        iterators._structure(msg, sfp)
2101        eq(sfp.getvalue(), """\
2102multipart/mixed
2103    multipart/mixed
2104        multipart/alternative
2105        application/octet-stream
2106        application/octet-stream
2107    text/plain
2108""")
2109
2110    def test_boundary_in_non_multipart(self):
2111        msg = self._msgobj('msg_40.txt')
2112        self.assertEqual(msg.as_string(), '''\
2113MIME-Version: 1.0
2114Content-Type: text/html; boundary="--961284236552522269"
2115
2116----961284236552522269
2117Content-Type: text/html;
2118Content-Transfer-Encoding: 7Bit
2119
2120<html></html>
2121
2122----961284236552522269--
2123''')
2124
2125    def test_boundary_with_leading_space(self):
2126        eq = self.assertEqual
2127        msg = email.message_from_string('''\
2128MIME-Version: 1.0
2129Content-Type: multipart/mixed; boundary="    XXXX"
2130
2131--    XXXX
2132Content-Type: text/plain
2133
2134
2135--    XXXX
2136Content-Type: text/plain
2137
2138--    XXXX--
2139''')
2140        self.assertTrue(msg.is_multipart())
2141        eq(msg.get_boundary(), '    XXXX')
2142        eq(len(msg.get_payload()), 2)
2143
2144    def test_boundary_without_trailing_newline(self):
2145        m = Parser().parsestr("""\
2146Content-Type: multipart/mixed; boundary="===============0012394164=="
2147MIME-Version: 1.0
2148
2149--===============0012394164==
2150Content-Type: image/file1.jpg
2151MIME-Version: 1.0
2152Content-Transfer-Encoding: base64
2153
2154YXNkZg==
2155--===============0012394164==--""")
2156        self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
2157
2158    def test_mimebase_default_policy(self):
2159        m = MIMEBase('multipart', 'mixed')
2160        self.assertIs(m.policy, email.policy.compat32)
2161
2162    def test_mimebase_custom_policy(self):
2163        m = MIMEBase('multipart', 'mixed', policy=email.policy.default)
2164        self.assertIs(m.policy, email.policy.default)
2165
2166# Test some badly formatted messages
2167class TestNonConformant(TestEmailBase):
2168
2169    def test_parse_missing_minor_type(self):
2170        eq = self.assertEqual
2171        msg = self._msgobj('msg_14.txt')
2172        eq(msg.get_content_type(), 'text/plain')
2173        eq(msg.get_content_maintype(), 'text')
2174        eq(msg.get_content_subtype(), 'plain')
2175
2176    # test_defect_handling
2177    def test_same_boundary_inner_outer(self):
2178        msg = self._msgobj('msg_15.txt')
2179        # XXX We can probably eventually do better
2180        inner = msg.get_payload(0)
2181        self.assertTrue(hasattr(inner, 'defects'))
2182        self.assertEqual(len(inner.defects), 1)
2183        self.assertIsInstance(inner.defects[0],
2184                              errors.StartBoundaryNotFoundDefect)
2185
2186    # test_defect_handling
2187    def test_multipart_no_boundary(self):
2188        msg = self._msgobj('msg_25.txt')
2189        self.assertIsInstance(msg.get_payload(), str)
2190        self.assertEqual(len(msg.defects), 2)
2191        self.assertIsInstance(msg.defects[0],
2192                              errors.NoBoundaryInMultipartDefect)
2193        self.assertIsInstance(msg.defects[1],
2194                              errors.MultipartInvariantViolationDefect)
2195
2196    multipart_msg = textwrap.dedent("""\
2197        Date: Wed, 14 Nov 2007 12:56:23 GMT
2198        From: foo@bar.invalid
2199        To: foo@bar.invalid
2200        Subject: Content-Transfer-Encoding: base64 and multipart
2201        MIME-Version: 1.0
2202        Content-Type: multipart/mixed;
2203            boundary="===============3344438784458119861=="{}
2204
2205        --===============3344438784458119861==
2206        Content-Type: text/plain
2207
2208        Test message
2209
2210        --===============3344438784458119861==
2211        Content-Type: application/octet-stream
2212        Content-Transfer-Encoding: base64
2213
2214        YWJj
2215
2216        --===============3344438784458119861==--
2217        """)
2218
2219    # test_defect_handling
2220    def test_multipart_invalid_cte(self):
2221        msg = self._str_msg(
2222            self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2223        self.assertEqual(len(msg.defects), 1)
2224        self.assertIsInstance(msg.defects[0],
2225            errors.InvalidMultipartContentTransferEncodingDefect)
2226
2227    # test_defect_handling
2228    def test_multipart_no_cte_no_defect(self):
2229        msg = self._str_msg(self.multipart_msg.format(''))
2230        self.assertEqual(len(msg.defects), 0)
2231
2232    # test_defect_handling
2233    def test_multipart_valid_cte_no_defect(self):
2234        for cte in ('7bit', '8bit', 'BINary'):
2235            msg = self._str_msg(
2236                self.multipart_msg.format(
2237                    "\nContent-Transfer-Encoding: {}".format(cte)))
2238            self.assertEqual(len(msg.defects), 0)
2239
2240    # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
2241    def test_invalid_content_type(self):
2242        eq = self.assertEqual
2243        neq = self.ndiffAssertEqual
2244        msg = Message()
2245        # RFC 2045, $5.2 says invalid yields text/plain
2246        msg['Content-Type'] = 'text'
2247        eq(msg.get_content_maintype(), 'text')
2248        eq(msg.get_content_subtype(), 'plain')
2249        eq(msg.get_content_type(), 'text/plain')
2250        # Clear the old value and try something /really/ invalid
2251        del msg['content-type']
2252        msg['Content-Type'] = 'foo'
2253        eq(msg.get_content_maintype(), 'text')
2254        eq(msg.get_content_subtype(), 'plain')
2255        eq(msg.get_content_type(), 'text/plain')
2256        # Still, make sure that the message is idempotently generated
2257        s = StringIO()
2258        g = Generator(s)
2259        g.flatten(msg)
2260        neq(s.getvalue(), 'Content-Type: foo\n\n')
2261
2262    def test_no_start_boundary(self):
2263        eq = self.ndiffAssertEqual
2264        msg = self._msgobj('msg_31.txt')
2265        eq(msg.get_payload(), """\
2266--BOUNDARY
2267Content-Type: text/plain
2268
2269message 1
2270
2271--BOUNDARY
2272Content-Type: text/plain
2273
2274message 2
2275
2276--BOUNDARY--
2277""")
2278
2279    def test_no_separating_blank_line(self):
2280        eq = self.ndiffAssertEqual
2281        msg = self._msgobj('msg_35.txt')
2282        eq(msg.as_string(), """\
2283From: aperson@dom.ain
2284To: bperson@dom.ain
2285Subject: here's something interesting
2286
2287counter to RFC 2822, there's no separating newline here
2288""")
2289
2290    # test_defect_handling
2291    def test_lying_multipart(self):
2292        msg = self._msgobj('msg_41.txt')
2293        self.assertTrue(hasattr(msg, 'defects'))
2294        self.assertEqual(len(msg.defects), 2)
2295        self.assertIsInstance(msg.defects[0],
2296                              errors.NoBoundaryInMultipartDefect)
2297        self.assertIsInstance(msg.defects[1],
2298                              errors.MultipartInvariantViolationDefect)
2299
2300    # test_defect_handling
2301    def test_missing_start_boundary(self):
2302        outer = self._msgobj('msg_42.txt')
2303        # The message structure is:
2304        #
2305        # multipart/mixed
2306        #    text/plain
2307        #    message/rfc822
2308        #        multipart/mixed [*]
2309        #
2310        # [*] This message is missing its start boundary
2311        bad = outer.get_payload(1).get_payload(0)
2312        self.assertEqual(len(bad.defects), 1)
2313        self.assertIsInstance(bad.defects[0],
2314                              errors.StartBoundaryNotFoundDefect)
2315
2316    # test_defect_handling
2317    def test_first_line_is_continuation_header(self):
2318        eq = self.assertEqual
2319        m = ' Line 1\nSubject: test\n\nbody'
2320        msg = email.message_from_string(m)
2321        eq(msg.keys(), ['Subject'])
2322        eq(msg.get_payload(), 'body')
2323        eq(len(msg.defects), 1)
2324        self.assertDefectsEqual(msg.defects,
2325                                 [errors.FirstHeaderLineIsContinuationDefect])
2326        eq(msg.defects[0].line, ' Line 1\n')
2327
2328    # test_defect_handling
2329    def test_missing_header_body_separator(self):
2330        # Our heuristic if we see a line that doesn't look like a header (no
2331        # leading whitespace but no ':') is to assume that the blank line that
2332        # separates the header from the body is missing, and to stop parsing
2333        # headers and start parsing the body.
2334        msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2335        self.assertEqual(msg.keys(), ['Subject'])
2336        self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2337        self.assertDefectsEqual(msg.defects,
2338                                [errors.MissingHeaderBodySeparatorDefect])
2339
2340
2341# Test RFC 2047 header encoding and decoding
2342class TestRFC2047(TestEmailBase):
2343    def test_rfc2047_multiline(self):
2344        eq = self.assertEqual
2345        s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2346 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2347        dh = decode_header(s)
2348        eq(dh, [
2349            (b'Re: ', None),
2350            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2351            (b' baz foo bar ', None),
2352            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2353        header = make_header(dh)
2354        eq(str(header),
2355           'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
2356        self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
2357Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2358 =?mac-iceland?q?=9Arg=8Cs?=""")
2359
2360    def test_whitespace_keeper_unicode(self):
2361        eq = self.assertEqual
2362        s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2363        dh = decode_header(s)
2364        eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2365                (b' Pirard <pirard@dom.ain>', None)])
2366        header = str(make_header(dh))
2367        eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2368
2369    def test_whitespace_keeper_unicode_2(self):
2370        eq = self.assertEqual
2371        s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2372        dh = decode_header(s)
2373        eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2374                (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
2375        hu = str(make_header(dh))
2376        eq(hu, 'The quick brown fox jumped over the lazy dog')
2377
2378    def test_rfc2047_missing_whitespace(self):
2379        s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2380        dh = decode_header(s)
2381        self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2382                              (b'rg', None), (b'\xe5', 'iso-8859-1'),
2383                              (b'sbord', None)])
2384
2385    def test_rfc2047_with_whitespace(self):
2386        s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2387        dh = decode_header(s)
2388        self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2389                              (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2390                              (b' sbord', None)])
2391
2392    def test_rfc2047_B_bad_padding(self):
2393        s = '=?iso-8859-1?B?%s?='
2394        data = [                                # only test complete bytes
2395            ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2396            ('dmk=', b'vi'), ('dmk', b'vi')
2397          ]
2398        for q, a in data:
2399            dh = decode_header(s % q)
2400            self.assertEqual(dh, [(a, 'iso-8859-1')])
2401
2402    def test_rfc2047_Q_invalid_digits(self):
2403        # issue 10004.
2404        s = '=?iso-8859-1?Q?andr=e9=zz?='
2405        self.assertEqual(decode_header(s),
2406                        [(b'andr\xe9=zz', 'iso-8859-1')])
2407
2408    def test_rfc2047_rfc2047_1(self):
2409        # 1st testcase at end of rfc2047
2410        s = '(=?ISO-8859-1?Q?a?=)'
2411        self.assertEqual(decode_header(s),
2412            [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2413
2414    def test_rfc2047_rfc2047_2(self):
2415        # 2nd testcase at end of rfc2047
2416        s = '(=?ISO-8859-1?Q?a?= b)'
2417        self.assertEqual(decode_header(s),
2418            [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2419
2420    def test_rfc2047_rfc2047_3(self):
2421        # 3rd testcase at end of rfc2047
2422        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2423        self.assertEqual(decode_header(s),
2424            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2425
2426    def test_rfc2047_rfc2047_4(self):
2427        # 4th testcase at end of rfc2047
2428        s = '(=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=)'
2429        self.assertEqual(decode_header(s),
2430            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2431
2432    def test_rfc2047_rfc2047_5a(self):
2433        # 5th testcase at end of rfc2047 newline is \r\n
2434        s = '(=?ISO-8859-1?Q?a?=\r\n    =?ISO-8859-1?Q?b?=)'
2435        self.assertEqual(decode_header(s),
2436            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2437
2438    def test_rfc2047_rfc2047_5b(self):
2439        # 5th testcase at end of rfc2047 newline is \n
2440        s = '(=?ISO-8859-1?Q?a?=\n    =?ISO-8859-1?Q?b?=)'
2441        self.assertEqual(decode_header(s),
2442            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2443
2444    def test_rfc2047_rfc2047_6(self):
2445        # 6th testcase at end of rfc2047
2446        s = '(=?ISO-8859-1?Q?a_b?=)'
2447        self.assertEqual(decode_header(s),
2448            [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2449
2450    def test_rfc2047_rfc2047_7(self):
2451        # 7th testcase at end of rfc2047
2452        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2453        self.assertEqual(decode_header(s),
2454            [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2455             (b')', None)])
2456        self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2457        self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2458
2459    def test_multiline_header(self):
2460        s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2461        self.assertEqual(decode_header(s),
2462            [(b'"M\xfcller T"', 'windows-1252'),
2463             (b'<T.Mueller@xxx.com>', None)])
2464        self.assertEqual(make_header(decode_header(s)).encode(),
2465                         ''.join(s.splitlines()))
2466        self.assertEqual(str(make_header(decode_header(s))),
2467                         '"Müller T" <T.Mueller@xxx.com>')
2468
2469
2470# Test the MIMEMessage class
2471class TestMIMEMessage(TestEmailBase):
2472    def setUp(self):
2473        with openfile('msg_11.txt', encoding="utf-8") as fp:
2474            self._text = fp.read()
2475
2476    def test_type_error(self):
2477        self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2478
2479    def test_valid_argument(self):
2480        eq = self.assertEqual
2481        subject = 'A sub-message'
2482        m = Message()
2483        m['Subject'] = subject
2484        r = MIMEMessage(m)
2485        eq(r.get_content_type(), 'message/rfc822')
2486        payload = r.get_payload()
2487        self.assertIsInstance(payload, list)
2488        eq(len(payload), 1)
2489        subpart = payload[0]
2490        self.assertIs(subpart, m)
2491        eq(subpart['subject'], subject)
2492
2493    def test_bad_multipart(self):
2494        msg1 = Message()
2495        msg1['Subject'] = 'subpart 1'
2496        msg2 = Message()
2497        msg2['Subject'] = 'subpart 2'
2498        r = MIMEMessage(msg1)
2499        self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2500
2501    def test_generate(self):
2502        # First craft the message to be encapsulated
2503        m = Message()
2504        m['Subject'] = 'An enclosed message'
2505        m.set_payload('Here is the body of the message.\n')
2506        r = MIMEMessage(m)
2507        r['Subject'] = 'The enclosing message'
2508        s = StringIO()
2509        g = Generator(s)
2510        g.flatten(r)
2511        self.assertEqual(s.getvalue(), """\
2512Content-Type: message/rfc822
2513MIME-Version: 1.0
2514Subject: The enclosing message
2515
2516Subject: An enclosed message
2517
2518Here is the body of the message.
2519""")
2520
2521    def test_parse_message_rfc822(self):
2522        eq = self.assertEqual
2523        msg = self._msgobj('msg_11.txt')
2524        eq(msg.get_content_type(), 'message/rfc822')
2525        payload = msg.get_payload()
2526        self.assertIsInstance(payload, list)
2527        eq(len(payload), 1)
2528        submsg = payload[0]
2529        self.assertIsInstance(submsg, Message)
2530        eq(submsg['subject'], 'An enclosed message')
2531        eq(submsg.get_payload(), 'Here is the body of the message.\n')
2532
2533    def test_dsn(self):
2534        eq = self.assertEqual
2535        # msg 16 is a Delivery Status Notification, see RFC 1894
2536        msg = self._msgobj('msg_16.txt')
2537        eq(msg.get_content_type(), 'multipart/report')
2538        self.assertTrue(msg.is_multipart())
2539        eq(len(msg.get_payload()), 3)
2540        # Subpart 1 is a text/plain, human readable section
2541        subpart = msg.get_payload(0)
2542        eq(subpart.get_content_type(), 'text/plain')
2543        eq(subpart.get_payload(), """\
2544This report relates to a message you sent with the following header fields:
2545
2546  Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2547  Date: Sun, 23 Sep 2001 20:10:55 -0700
2548  From: "Ian T. Henry" <henryi@oxy.edu>
2549  To: SoCal Raves <scr@socal-raves.org>
2550  Subject: [scr] yeah for Ians!!
2551
2552Your message cannot be delivered to the following recipients:
2553
2554  Recipient address: jangel1@cougar.noc.ucla.edu
2555  Reason: recipient reached disk quota
2556
2557""")
2558        # Subpart 2 contains the machine parsable DSN information.  It
2559        # consists of two blocks of headers, represented by two nested Message
2560        # objects.
2561        subpart = msg.get_payload(1)
2562        eq(subpart.get_content_type(), 'message/delivery-status')
2563        eq(len(subpart.get_payload()), 2)
2564        # message/delivery-status should treat each block as a bunch of
2565        # headers, i.e. a bunch of Message objects.
2566        dsn1 = subpart.get_payload(0)
2567        self.assertIsInstance(dsn1, Message)
2568        eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2569        eq(dsn1.get_param('dns', header='reporting-mta'), '')
2570        # Try a missing one <wink>
2571        eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2572        dsn2 = subpart.get_payload(1)
2573        self.assertIsInstance(dsn2, Message)
2574        eq(dsn2['action'], 'failed')
2575        eq(dsn2.get_params(header='original-recipient'),
2576           [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2577        eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2578        # Subpart 3 is the original message
2579        subpart = msg.get_payload(2)
2580        eq(subpart.get_content_type(), 'message/rfc822')
2581        payload = subpart.get_payload()
2582        self.assertIsInstance(payload, list)
2583        eq(len(payload), 1)
2584        subsubpart = payload[0]
2585        self.assertIsInstance(subsubpart, Message)
2586        eq(subsubpart.get_content_type(), 'text/plain')
2587        eq(subsubpart['message-id'],
2588           '<002001c144a6$8752e060$56104586@oxy.edu>')
2589
2590    def test_epilogue(self):
2591        eq = self.ndiffAssertEqual
2592        with openfile('msg_21.txt', encoding="utf-8") as fp:
2593            text = fp.read()
2594        msg = Message()
2595        msg['From'] = 'aperson@dom.ain'
2596        msg['To'] = 'bperson@dom.ain'
2597        msg['Subject'] = 'Test'
2598        msg.preamble = 'MIME message'
2599        msg.epilogue = 'End of MIME message\n'
2600        msg1 = MIMEText('One')
2601        msg2 = MIMEText('Two')
2602        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2603        msg.attach(msg1)
2604        msg.attach(msg2)
2605        sfp = StringIO()
2606        g = Generator(sfp)
2607        g.flatten(msg)
2608        eq(sfp.getvalue(), text)
2609
2610    def test_no_nl_preamble(self):
2611        eq = self.ndiffAssertEqual
2612        msg = Message()
2613        msg['From'] = 'aperson@dom.ain'
2614        msg['To'] = 'bperson@dom.ain'
2615        msg['Subject'] = 'Test'
2616        msg.preamble = 'MIME message'
2617        msg.epilogue = ''
2618        msg1 = MIMEText('One')
2619        msg2 = MIMEText('Two')
2620        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2621        msg.attach(msg1)
2622        msg.attach(msg2)
2623        eq(msg.as_string(), """\
2624From: aperson@dom.ain
2625To: bperson@dom.ain
2626Subject: Test
2627Content-Type: multipart/mixed; boundary="BOUNDARY"
2628
2629MIME message
2630--BOUNDARY
2631Content-Type: text/plain; charset="us-ascii"
2632MIME-Version: 1.0
2633Content-Transfer-Encoding: 7bit
2634
2635One
2636--BOUNDARY
2637Content-Type: text/plain; charset="us-ascii"
2638MIME-Version: 1.0
2639Content-Transfer-Encoding: 7bit
2640
2641Two
2642--BOUNDARY--
2643""")
2644
2645    def test_default_type(self):
2646        eq = self.assertEqual
2647        with openfile('msg_30.txt', encoding="utf-8") as fp:
2648            msg = email.message_from_file(fp)
2649        container1 = msg.get_payload(0)
2650        eq(container1.get_default_type(), 'message/rfc822')
2651        eq(container1.get_content_type(), 'message/rfc822')
2652        container2 = msg.get_payload(1)
2653        eq(container2.get_default_type(), 'message/rfc822')
2654        eq(container2.get_content_type(), 'message/rfc822')
2655        container1a = container1.get_payload(0)
2656        eq(container1a.get_default_type(), 'text/plain')
2657        eq(container1a.get_content_type(), 'text/plain')
2658        container2a = container2.get_payload(0)
2659        eq(container2a.get_default_type(), 'text/plain')
2660        eq(container2a.get_content_type(), 'text/plain')
2661
2662    def test_default_type_with_explicit_container_type(self):
2663        eq = self.assertEqual
2664        with openfile('msg_28.txt', encoding="utf-8") as fp:
2665            msg = email.message_from_file(fp)
2666        container1 = msg.get_payload(0)
2667        eq(container1.get_default_type(), 'message/rfc822')
2668        eq(container1.get_content_type(), 'message/rfc822')
2669        container2 = msg.get_payload(1)
2670        eq(container2.get_default_type(), 'message/rfc822')
2671        eq(container2.get_content_type(), 'message/rfc822')
2672        container1a = container1.get_payload(0)
2673        eq(container1a.get_default_type(), 'text/plain')
2674        eq(container1a.get_content_type(), 'text/plain')
2675        container2a = container2.get_payload(0)
2676        eq(container2a.get_default_type(), 'text/plain')
2677        eq(container2a.get_content_type(), 'text/plain')
2678
2679    def test_default_type_non_parsed(self):
2680        eq = self.assertEqual
2681        neq = self.ndiffAssertEqual
2682        # Set up container
2683        container = MIMEMultipart('digest', 'BOUNDARY')
2684        container.epilogue = ''
2685        # Set up subparts
2686        subpart1a = MIMEText('message 1\n')
2687        subpart2a = MIMEText('message 2\n')
2688        subpart1 = MIMEMessage(subpart1a)
2689        subpart2 = MIMEMessage(subpart2a)
2690        container.attach(subpart1)
2691        container.attach(subpart2)
2692        eq(subpart1.get_content_type(), 'message/rfc822')
2693        eq(subpart1.get_default_type(), 'message/rfc822')
2694        eq(subpart2.get_content_type(), 'message/rfc822')
2695        eq(subpart2.get_default_type(), 'message/rfc822')
2696        neq(container.as_string(0), '''\
2697Content-Type: multipart/digest; boundary="BOUNDARY"
2698MIME-Version: 1.0
2699
2700--BOUNDARY
2701Content-Type: message/rfc822
2702MIME-Version: 1.0
2703
2704Content-Type: text/plain; charset="us-ascii"
2705MIME-Version: 1.0
2706Content-Transfer-Encoding: 7bit
2707
2708message 1
2709
2710--BOUNDARY
2711Content-Type: message/rfc822
2712MIME-Version: 1.0
2713
2714Content-Type: text/plain; charset="us-ascii"
2715MIME-Version: 1.0
2716Content-Transfer-Encoding: 7bit
2717
2718message 2
2719
2720--BOUNDARY--
2721''')
2722        del subpart1['content-type']
2723        del subpart1['mime-version']
2724        del subpart2['content-type']
2725        del subpart2['mime-version']
2726        eq(subpart1.get_content_type(), 'message/rfc822')
2727        eq(subpart1.get_default_type(), 'message/rfc822')
2728        eq(subpart2.get_content_type(), 'message/rfc822')
2729        eq(subpart2.get_default_type(), 'message/rfc822')
2730        neq(container.as_string(0), '''\
2731Content-Type: multipart/digest; boundary="BOUNDARY"
2732MIME-Version: 1.0
2733
2734--BOUNDARY
2735
2736Content-Type: text/plain; charset="us-ascii"
2737MIME-Version: 1.0
2738Content-Transfer-Encoding: 7bit
2739
2740message 1
2741
2742--BOUNDARY
2743
2744Content-Type: text/plain; charset="us-ascii"
2745MIME-Version: 1.0
2746Content-Transfer-Encoding: 7bit
2747
2748message 2
2749
2750--BOUNDARY--
2751''')
2752
2753    def test_mime_attachments_in_constructor(self):
2754        eq = self.assertEqual
2755        text1 = MIMEText('')
2756        text2 = MIMEText('')
2757        msg = MIMEMultipart(_subparts=(text1, text2))
2758        eq(len(msg.get_payload()), 2)
2759        eq(msg.get_payload(0), text1)
2760        eq(msg.get_payload(1), text2)
2761
2762    def test_default_multipart_constructor(self):
2763        msg = MIMEMultipart()
2764        self.assertTrue(msg.is_multipart())
2765
2766    def test_multipart_default_policy(self):
2767        msg = MIMEMultipart()
2768        msg['To'] = 'a@b.com'
2769        msg['To'] = 'c@d.com'
2770        self.assertEqual(msg.get_all('to'), ['a@b.com', 'c@d.com'])
2771
2772    def test_multipart_custom_policy(self):
2773        msg = MIMEMultipart(policy=email.policy.default)
2774        msg['To'] = 'a@b.com'
2775        with self.assertRaises(ValueError) as cm:
2776            msg['To'] = 'c@d.com'
2777        self.assertEqual(str(cm.exception),
2778                         'There may be at most 1 To headers in a message')
2779
2780
2781# Test the NonMultipart class
2782class TestNonMultipart(TestEmailBase):
2783    def test_nonmultipart_is_not_multipart(self):
2784        msg = MIMENonMultipart('text', 'plain')
2785        self.assertFalse(msg.is_multipart())
2786
2787    def test_attach_raises_exception(self):
2788        msg = Message()
2789        msg['Subject'] = 'subpart 1'
2790        r = MIMENonMultipart('text', 'plain')
2791        self.assertRaises(errors.MultipartConversionError, r.attach, msg)
2792
2793
2794# A general test of parser->model->generator idempotency.  IOW, read a message
2795# in, parse it into a message object tree, then without touching the tree,
2796# regenerate the plain text.  The original text and the transformed text
2797# should be identical.  Note: that we ignore the Unix-From since that may
2798# contain a changed date.
2799class TestIdempotent(TestEmailBase):
2800
2801    linesep = '\n'
2802
2803    def _msgobj(self, filename):
2804        with openfile(filename, encoding="utf-8") as fp:
2805            data = fp.read()
2806        msg = email.message_from_string(data)
2807        return msg, data
2808
2809    def _idempotent(self, msg, text, unixfrom=False):
2810        eq = self.ndiffAssertEqual
2811        s = StringIO()
2812        g = Generator(s, maxheaderlen=0)
2813        g.flatten(msg, unixfrom=unixfrom)
2814        eq(text, s.getvalue())
2815
2816    def test_parse_text_message(self):
2817        eq = self.assertEqual
2818        msg, text = self._msgobj('msg_01.txt')
2819        eq(msg.get_content_type(), 'text/plain')
2820        eq(msg.get_content_maintype(), 'text')
2821        eq(msg.get_content_subtype(), 'plain')
2822        eq(msg.get_params()[1], ('charset', 'us-ascii'))
2823        eq(msg.get_param('charset'), 'us-ascii')
2824        eq(msg.preamble, None)
2825        eq(msg.epilogue, None)
2826        self._idempotent(msg, text)
2827
2828    def test_parse_untyped_message(self):
2829        eq = self.assertEqual
2830        msg, text = self._msgobj('msg_03.txt')
2831        eq(msg.get_content_type(), 'text/plain')
2832        eq(msg.get_params(), None)
2833        eq(msg.get_param('charset'), None)
2834        self._idempotent(msg, text)
2835
2836    def test_simple_multipart(self):
2837        msg, text = self._msgobj('msg_04.txt')
2838        self._idempotent(msg, text)
2839
2840    def test_MIME_digest(self):
2841        msg, text = self._msgobj('msg_02.txt')
2842        self._idempotent(msg, text)
2843
2844    def test_long_header(self):
2845        msg, text = self._msgobj('msg_27.txt')
2846        self._idempotent(msg, text)
2847
2848    def test_MIME_digest_with_part_headers(self):
2849        msg, text = self._msgobj('msg_28.txt')
2850        self._idempotent(msg, text)
2851
2852    def test_mixed_with_image(self):
2853        msg, text = self._msgobj('msg_06.txt')
2854        self._idempotent(msg, text)
2855
2856    def test_multipart_report(self):
2857        msg, text = self._msgobj('msg_05.txt')
2858        self._idempotent(msg, text)
2859
2860    def test_dsn(self):
2861        msg, text = self._msgobj('msg_16.txt')
2862        self._idempotent(msg, text)
2863
2864    def test_preamble_epilogue(self):
2865        msg, text = self._msgobj('msg_21.txt')
2866        self._idempotent(msg, text)
2867
2868    def test_multipart_one_part(self):
2869        msg, text = self._msgobj('msg_23.txt')
2870        self._idempotent(msg, text)
2871
2872    def test_multipart_no_parts(self):
2873        msg, text = self._msgobj('msg_24.txt')
2874        self._idempotent(msg, text)
2875
2876    def test_no_start_boundary(self):
2877        msg, text = self._msgobj('msg_31.txt')
2878        self._idempotent(msg, text)
2879
2880    def test_rfc2231_charset(self):
2881        msg, text = self._msgobj('msg_32.txt')
2882        self._idempotent(msg, text)
2883
2884    def test_more_rfc2231_parameters(self):
2885        msg, text = self._msgobj('msg_33.txt')
2886        self._idempotent(msg, text)
2887
2888    def test_text_plain_in_a_multipart_digest(self):
2889        msg, text = self._msgobj('msg_34.txt')
2890        self._idempotent(msg, text)
2891
2892    def test_nested_multipart_mixeds(self):
2893        msg, text = self._msgobj('msg_12a.txt')
2894        self._idempotent(msg, text)
2895
2896    def test_message_external_body_idempotent(self):
2897        msg, text = self._msgobj('msg_36.txt')
2898        self._idempotent(msg, text)
2899
2900    def test_message_delivery_status(self):
2901        msg, text = self._msgobj('msg_43.txt')
2902        self._idempotent(msg, text, unixfrom=True)
2903
2904    def test_message_signed_idempotent(self):
2905        msg, text = self._msgobj('msg_45.txt')
2906        self._idempotent(msg, text)
2907
2908    def test_content_type(self):
2909        eq = self.assertEqual
2910        # Get a message object and reset the seek pointer for other tests
2911        msg, text = self._msgobj('msg_05.txt')
2912        eq(msg.get_content_type(), 'multipart/report')
2913        # Test the Content-Type: parameters
2914        params = {}
2915        for pk, pv in msg.get_params():
2916            params[pk] = pv
2917        eq(params['report-type'], 'delivery-status')
2918        eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2919        eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2920        eq(msg.epilogue, self.linesep)
2921        eq(len(msg.get_payload()), 3)
2922        # Make sure the subparts are what we expect
2923        msg1 = msg.get_payload(0)
2924        eq(msg1.get_content_type(), 'text/plain')
2925        eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
2926        msg2 = msg.get_payload(1)
2927        eq(msg2.get_content_type(), 'text/plain')
2928        eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
2929        msg3 = msg.get_payload(2)
2930        eq(msg3.get_content_type(), 'message/rfc822')
2931        self.assertIsInstance(msg3, Message)
2932        payload = msg3.get_payload()
2933        self.assertIsInstance(payload, list)
2934        eq(len(payload), 1)
2935        msg4 = payload[0]
2936        self.assertIsInstance(msg4, Message)
2937        eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
2938
2939    def test_parser(self):
2940        eq = self.assertEqual
2941        msg, text = self._msgobj('msg_06.txt')
2942        # Check some of the outer headers
2943        eq(msg.get_content_type(), 'message/rfc822')
2944        # Make sure the payload is a list of exactly one sub-Message, and that
2945        # that submessage has a type of text/plain
2946        payload = msg.get_payload()
2947        self.assertIsInstance(payload, list)
2948        eq(len(payload), 1)
2949        msg1 = payload[0]
2950        self.assertIsInstance(msg1, Message)
2951        eq(msg1.get_content_type(), 'text/plain')
2952        self.assertIsInstance(msg1.get_payload(), str)
2953        eq(msg1.get_payload(), self.linesep)
2954
2955
2956
2957# Test various other bits of the package's functionality
2958class TestMiscellaneous(TestEmailBase):
2959    def test_message_from_string(self):
2960        with openfile('msg_01.txt', encoding="utf-8") as fp:
2961            text = fp.read()
2962        msg = email.message_from_string(text)
2963        s = StringIO()
2964        # Don't wrap/continue long headers since we're trying to test
2965        # idempotency.
2966        g = Generator(s, maxheaderlen=0)
2967        g.flatten(msg)
2968        self.assertEqual(text, s.getvalue())
2969
2970    def test_message_from_file(self):
2971        with openfile('msg_01.txt', encoding="utf-8") as fp:
2972            text = fp.read()
2973            fp.seek(0)
2974            msg = email.message_from_file(fp)
2975            s = StringIO()
2976            # Don't wrap/continue long headers since we're trying to test
2977            # idempotency.
2978            g = Generator(s, maxheaderlen=0)
2979            g.flatten(msg)
2980            self.assertEqual(text, s.getvalue())
2981
2982    def test_message_from_string_with_class(self):
2983        with openfile('msg_01.txt', encoding="utf-8") as fp:
2984            text = fp.read()
2985
2986        # Create a subclass
2987        class MyMessage(Message):
2988            pass
2989
2990        msg = email.message_from_string(text, MyMessage)
2991        self.assertIsInstance(msg, MyMessage)
2992        # Try something more complicated
2993        with openfile('msg_02.txt', encoding="utf-8") as fp:
2994            text = fp.read()
2995        msg = email.message_from_string(text, MyMessage)
2996        for subpart in msg.walk():
2997            self.assertIsInstance(subpart, MyMessage)
2998
2999    def test_message_from_file_with_class(self):
3000        # Create a subclass
3001        class MyMessage(Message):
3002            pass
3003
3004        with openfile('msg_01.txt', encoding="utf-8") as fp:
3005            msg = email.message_from_file(fp, MyMessage)
3006        self.assertIsInstance(msg, MyMessage)
3007        # Try something more complicated
3008        with openfile('msg_02.txt', encoding="utf-8") as fp:
3009            msg = email.message_from_file(fp, MyMessage)
3010        for subpart in msg.walk():
3011            self.assertIsInstance(subpart, MyMessage)
3012
3013    def test_custom_message_does_not_require_arguments(self):
3014        class MyMessage(Message):
3015            def __init__(self):
3016                super().__init__()
3017        msg = self._str_msg("Subject: test\n\ntest", MyMessage)
3018        self.assertIsInstance(msg, MyMessage)
3019
3020    def test__all__(self):
3021        module = __import__('email')
3022        self.assertEqual(sorted(module.__all__), [
3023            'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
3024            'generator', 'header', 'iterators', 'message',
3025            'message_from_binary_file', 'message_from_bytes',
3026            'message_from_file', 'message_from_string', 'mime', 'parser',
3027            'quoprimime', 'utils',
3028            ])
3029
3030    def test_formatdate(self):
3031        now = time.time()
3032        self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
3033                         time.gmtime(now)[:6])
3034
3035    def test_formatdate_localtime(self):
3036        now = time.time()
3037        self.assertEqual(
3038            utils.parsedate(utils.formatdate(now, localtime=True))[:6],
3039            time.localtime(now)[:6])
3040
3041    def test_formatdate_usegmt(self):
3042        now = time.time()
3043        self.assertEqual(
3044            utils.formatdate(now, localtime=False),
3045            time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
3046        self.assertEqual(
3047            utils.formatdate(now, localtime=False, usegmt=True),
3048            time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
3049
3050    # parsedate and parsedate_tz will become deprecated interfaces someday
3051    def test_parsedate_returns_None_for_invalid_strings(self):
3052        # See also test_parsedate_to_datetime_with_invalid_raises_valueerror
3053        # in test_utils.
3054        invalid_dates = [
3055            '',
3056            ' ',
3057            '0',
3058            'A Complete Waste of Time',
3059            'Wed, 3 Apr 2002 12.34.56.78+0800',
3060            '17 June , 2022',
3061            'Friday, -Nov-82 16:14:55 EST',
3062            'Friday, Nov--82 16:14:55 EST',
3063            'Friday, 19-Nov- 16:14:55 EST',
3064        ]
3065        for dtstr in invalid_dates:
3066            with self.subTest(dtstr=dtstr):
3067                self.assertIsNone(utils.parsedate(dtstr))
3068                self.assertIsNone(utils.parsedate_tz(dtstr))
3069        # Not a part of the spec but, but this has historically worked:
3070        self.assertIsNone(utils.parsedate(None))
3071        self.assertIsNone(utils.parsedate_tz(None))
3072
3073    def test_parsedate_compact(self):
3074        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26 +0800'),
3075                         (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3076        # The FWS after the comma is optional
3077        self.assertEqual(utils.parsedate_tz('Wed,3 Apr 2002 14:58:26 +0800'),
3078                         (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3079        # The comma is optional
3080        self.assertEqual(utils.parsedate_tz('Wed 3 Apr 2002 14:58:26 +0800'),
3081                         (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3082
3083    def test_parsedate_no_dayofweek(self):
3084        eq = self.assertEqual
3085        eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
3086           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3087        eq(utils.parsedate_tz('February 5, 2003 13:47:26 -0800'),
3088           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3089
3090    def test_parsedate_no_space_before_positive_offset(self):
3091        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
3092           (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3093
3094    def test_parsedate_no_space_before_negative_offset(self):
3095        # Issue 1155362: we already handled '+' for this case.
3096        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
3097           (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
3098
3099    def test_parsedate_accepts_time_with_dots(self):
3100        eq = self.assertEqual
3101        eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
3102           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3103        eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
3104           (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
3105
3106    def test_parsedate_rfc_850(self):
3107        self.assertEqual(utils.parsedate_tz('Friday, 19-Nov-82 16:14:55 EST'),
3108           (1982, 11, 19, 16, 14, 55, 0, 1, -1, -18000))
3109
3110    def test_parsedate_no_seconds(self):
3111        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58 +0800'),
3112                         (2002, 4, 3, 14, 58, 0, 0, 1, -1, 28800))
3113
3114    def test_parsedate_dot_time_delimiter(self):
3115        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14.58.26 +0800'),
3116                         (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3117        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14.58 +0800'),
3118                         (2002, 4, 3, 14, 58, 0, 0, 1, -1, 28800))
3119
3120    def test_parsedate_acceptable_to_time_functions(self):
3121        eq = self.assertEqual
3122        timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
3123        t = int(time.mktime(timetup))
3124        eq(time.localtime(t)[:6], timetup[:6])
3125        eq(int(time.strftime('%Y', timetup)), 2003)
3126        timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
3127        t = int(time.mktime(timetup[:9]))
3128        eq(time.localtime(t)[:6], timetup[:6])
3129        eq(int(time.strftime('%Y', timetup[:9])), 2003)
3130
3131    def test_mktime_tz(self):
3132        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3133                                          -1, -1, -1, 0)), 0)
3134        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3135                                          -1, -1, -1, 1234)), -1234)
3136
3137    def test_parsedate_y2k(self):
3138        """Test for parsing a date with a two-digit year.
3139
3140        Parsing a date with a two-digit year should return the correct
3141        four-digit year. RFC822 allows two-digit years, but RFC2822 (which
3142        obsoletes RFC822) requires four-digit years.
3143
3144        """
3145        self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
3146                         utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
3147        self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
3148                         utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
3149
3150    def test_parseaddr_empty(self):
3151        self.assertEqual(utils.parseaddr('<>'), ('', ''))
3152        self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
3153
3154    def test_parseaddr_multiple_domains(self):
3155        self.assertEqual(
3156            utils.parseaddr('a@b@c'),
3157            ('', '')
3158        )
3159        self.assertEqual(
3160            utils.parseaddr('a@b.c@c'),
3161            ('', '')
3162        )
3163        self.assertEqual(
3164            utils.parseaddr('a@172.17.0.1@c'),
3165            ('', '')
3166        )
3167
3168    def test_noquote_dump(self):
3169        self.assertEqual(
3170            utils.formataddr(('A Silly Person', 'person@dom.ain')),
3171            'A Silly Person <person@dom.ain>')
3172
3173    def test_escape_dump(self):
3174        self.assertEqual(
3175            utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
3176            r'"A (Very) Silly Person" <person@dom.ain>')
3177        self.assertEqual(
3178            utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
3179            ('A (Very) Silly Person', 'person@dom.ain'))
3180        a = r'A \(Special\) Person'
3181        b = 'person@dom.ain'
3182        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3183
3184    def test_escape_backslashes(self):
3185        self.assertEqual(
3186            utils.formataddr((r'Arthur \Backslash\ Foobar', 'person@dom.ain')),
3187            r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
3188        a = r'Arthur \Backslash\ Foobar'
3189        b = 'person@dom.ain'
3190        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3191
3192    def test_quotes_unicode_names(self):
3193        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3194        name = "H\u00e4ns W\u00fcrst"
3195        addr = 'person@dom.ain'
3196        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
3197        latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
3198        self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
3199        self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
3200            latin1_quopri)
3201
3202    def test_accepts_any_charset_like_object(self):
3203        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3204        name = "H\u00e4ns W\u00fcrst"
3205        addr = 'person@dom.ain'
3206        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
3207        foobar = "FOOBAR"
3208        class CharsetMock:
3209            def header_encode(self, string):
3210                return foobar
3211        mock = CharsetMock()
3212        mock_expected = "%s <%s>" % (foobar, addr)
3213        self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
3214        self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
3215            utf8_base64)
3216
3217    def test_invalid_charset_like_object_raises_error(self):
3218        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3219        name = "H\u00e4ns W\u00fcrst"
3220        addr = 'person@dom.ain'
3221        # An object without a header_encode method:
3222        bad_charset = object()
3223        self.assertRaises(AttributeError, utils.formataddr, (name, addr),
3224            bad_charset)
3225
3226    def test_unicode_address_raises_error(self):
3227        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3228        addr = 'pers\u00f6n@dom.in'
3229        self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
3230        self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
3231
3232    def test_name_with_dot(self):
3233        x = 'John X. Doe <jxd@example.com>'
3234        y = '"John X. Doe" <jxd@example.com>'
3235        a, b = ('John X. Doe', 'jxd@example.com')
3236        self.assertEqual(utils.parseaddr(x), (a, b))
3237        self.assertEqual(utils.parseaddr(y), (a, b))
3238        # formataddr() quotes the name if there's a dot in it
3239        self.assertEqual(utils.formataddr((a, b)), y)
3240
3241    def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
3242        # issue 10005.  Note that in the third test the second pair of
3243        # backslashes is not actually a quoted pair because it is not inside a
3244        # comment or quoted string: the address being parsed has a quoted
3245        # string containing a quoted backslash, followed by 'example' and two
3246        # backslashes, followed by another quoted string containing a space and
3247        # the word 'example'.  parseaddr copies those two backslashes
3248        # literally.  Per rfc5322 this is not technically correct since a \ may
3249        # not appear in an address outside of a quoted string.  It is probably
3250        # a sensible Postel interpretation, though.
3251        eq = self.assertEqual
3252        eq(utils.parseaddr('""example" example"@example.com'),
3253          ('', '""example" example"@example.com'))
3254        eq(utils.parseaddr('"\\"example\\" example"@example.com'),
3255          ('', '"\\"example\\" example"@example.com'))
3256        eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
3257          ('', '"\\\\"example\\\\" example"@example.com'))
3258
3259    def test_parseaddr_preserves_spaces_in_local_part(self):
3260        # issue 9286.  A normal RFC5322 local part should not contain any
3261        # folding white space, but legacy local parts can (they are a sequence
3262        # of atoms, not dotatoms).  On the other hand we strip whitespace from
3263        # before the @ and around dots, on the assumption that the whitespace
3264        # around the punctuation is a mistake in what would otherwise be
3265        # an RFC5322 local part.  Leading whitespace is, usual, stripped as well.
3266        self.assertEqual(('', "merwok wok@xample.com"),
3267            utils.parseaddr("merwok wok@xample.com"))
3268        self.assertEqual(('', "merwok  wok@xample.com"),
3269            utils.parseaddr("merwok  wok@xample.com"))
3270        self.assertEqual(('', "merwok  wok@xample.com"),
3271            utils.parseaddr(" merwok  wok  @xample.com"))
3272        self.assertEqual(('', 'merwok"wok"  wok@xample.com'),
3273            utils.parseaddr('merwok"wok"  wok@xample.com'))
3274        self.assertEqual(('', 'merwok.wok.wok@xample.com'),
3275            utils.parseaddr('merwok. wok .  wok@xample.com'))
3276
3277    def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3278        addr = ("'foo@example.com' (foo@example.com)",
3279                'foo@example.com')
3280        addrstr = ('"\'foo@example.com\' '
3281                            '(foo@example.com)" <foo@example.com>')
3282        self.assertEqual(utils.parseaddr(addrstr), addr)
3283        self.assertEqual(utils.formataddr(addr), addrstr)
3284
3285
3286    def test_multiline_from_comment(self):
3287        x = """\
3288Foo
3289\tBar <foo@example.com>"""
3290        self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3291
3292    def test_quote_dump(self):
3293        self.assertEqual(
3294            utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3295            r'"A Silly; Person" <person@dom.ain>')
3296
3297    def test_charset_richcomparisons(self):
3298        eq = self.assertEqual
3299        ne = self.assertNotEqual
3300        cset1 = Charset()
3301        cset2 = Charset()
3302        eq(cset1, 'us-ascii')
3303        eq(cset1, 'US-ASCII')
3304        eq(cset1, 'Us-AsCiI')
3305        eq('us-ascii', cset1)
3306        eq('US-ASCII', cset1)
3307        eq('Us-AsCiI', cset1)
3308        ne(cset1, 'usascii')
3309        ne(cset1, 'USASCII')
3310        ne(cset1, 'UsAsCiI')
3311        ne('usascii', cset1)
3312        ne('USASCII', cset1)
3313        ne('UsAsCiI', cset1)
3314        eq(cset1, cset2)
3315        eq(cset2, cset1)
3316
3317    def test_getaddresses(self):
3318        eq = self.assertEqual
3319        eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3320                               'Bud Person <bperson@dom.ain>']),
3321           [('Al Person', 'aperson@dom.ain'),
3322            ('Bud Person', 'bperson@dom.ain')])
3323
3324    def test_getaddresses_comma_in_name(self):
3325        """GH-106669 regression test."""
3326        self.assertEqual(
3327            utils.getaddresses(
3328                [
3329                    '"Bud, Person" <bperson@dom.ain>',
3330                    'aperson@dom.ain (Al Person)',
3331                    '"Mariusz Felisiak" <to@example.com>',
3332                ]
3333            ),
3334            [
3335                ('Bud, Person', 'bperson@dom.ain'),
3336                ('Al Person', 'aperson@dom.ain'),
3337                ('Mariusz Felisiak', 'to@example.com'),
3338            ],
3339        )
3340
3341    def test_parsing_errors(self):
3342        """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056"""
3343        alice = 'alice@example.org'
3344        bob = 'bob@example.com'
3345        empty = ('', '')
3346
3347        # Test utils.getaddresses() and utils.parseaddr() on malformed email
3348        # addresses: default behavior (strict=True) rejects malformed address,
3349        # and strict=False which tolerates malformed address.
3350        for invalid_separator, expected_non_strict in (
3351            ('(', [(f'<{bob}>', alice)]),
3352            (')', [('', alice), empty, ('', bob)]),
3353            ('<', [('', alice), empty, ('', bob), empty]),
3354            ('>', [('', alice), empty, ('', bob)]),
3355            ('[', [('', f'{alice}[<{bob}>]')]),
3356            (']', [('', alice), empty, ('', bob)]),
3357            ('@', [empty, empty, ('', bob)]),
3358            (';', [('', alice), empty, ('', bob)]),
3359            (':', [('', alice), ('', bob)]),
3360            ('.', [('', alice + '.'), ('', bob)]),
3361            ('"', [('', alice), ('', f'<{bob}>')]),
3362        ):
3363            address = f'{alice}{invalid_separator}<{bob}>'
3364            with self.subTest(address=address):
3365                self.assertEqual(utils.getaddresses([address]),
3366                                 [empty])
3367                self.assertEqual(utils.getaddresses([address], strict=False),
3368                                 expected_non_strict)
3369
3370                self.assertEqual(utils.parseaddr([address]),
3371                                 empty)
3372                self.assertEqual(utils.parseaddr([address], strict=False),
3373                                 ('', address))
3374
3375        # Comma (',') is treated differently depending on strict parameter.
3376        # Comma without quotes.
3377        address = f'{alice},<{bob}>'
3378        self.assertEqual(utils.getaddresses([address]),
3379                         [('', alice), ('', bob)])
3380        self.assertEqual(utils.getaddresses([address], strict=False),
3381                         [('', alice), ('', bob)])
3382        self.assertEqual(utils.parseaddr([address]),
3383                         empty)
3384        self.assertEqual(utils.parseaddr([address], strict=False),
3385                         ('', address))
3386
3387        # Real name between quotes containing comma.
3388        address = '"Alice, alice@example.org" <bob@example.com>'
3389        expected_strict = ('Alice, alice@example.org', 'bob@example.com')
3390        self.assertEqual(utils.getaddresses([address]), [expected_strict])
3391        self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
3392        self.assertEqual(utils.parseaddr([address]), expected_strict)
3393        self.assertEqual(utils.parseaddr([address], strict=False),
3394                         ('', address))
3395
3396        # Valid parenthesis in comments.
3397        address = 'alice@example.org (Alice)'
3398        expected_strict = ('Alice', 'alice@example.org')
3399        self.assertEqual(utils.getaddresses([address]), [expected_strict])
3400        self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
3401        self.assertEqual(utils.parseaddr([address]), expected_strict)
3402        self.assertEqual(utils.parseaddr([address], strict=False),
3403                         ('', address))
3404
3405        # Invalid parenthesis in comments.
3406        address = 'alice@example.org )Alice('
3407        self.assertEqual(utils.getaddresses([address]), [empty])
3408        self.assertEqual(utils.getaddresses([address], strict=False),
3409                         [('', 'alice@example.org'), ('', ''), ('', 'Alice')])
3410        self.assertEqual(utils.parseaddr([address]), empty)
3411        self.assertEqual(utils.parseaddr([address], strict=False),
3412                         ('', address))
3413
3414        # Two addresses with quotes separated by comma.
3415        address = '"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>'
3416        self.assertEqual(utils.getaddresses([address]),
3417                         [('Jane Doe', 'jane@example.net'),
3418                          ('John Doe', 'john@example.net')])
3419        self.assertEqual(utils.getaddresses([address], strict=False),
3420                         [('Jane Doe', 'jane@example.net'),
3421                          ('John Doe', 'john@example.net')])
3422        self.assertEqual(utils.parseaddr([address]), empty)
3423        self.assertEqual(utils.parseaddr([address], strict=False),
3424                         ('', address))
3425
3426        # Test email.utils.supports_strict_parsing attribute
3427        self.assertEqual(email.utils.supports_strict_parsing, True)
3428
3429    def test_getaddresses_nasty(self):
3430        for addresses, expected in (
3431            (['"Sürname, Firstname" <to@example.com>'],
3432             [('Sürname, Firstname', 'to@example.com')]),
3433
3434            (['foo: ;'],
3435             [('', '')]),
3436
3437            (['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'],
3438             [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]),
3439
3440            ([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
3441             [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]),
3442
3443            (['(Empty list)(start)Undisclosed recipients  :(nobody(I know))'],
3444             [('', '')]),
3445
3446            (['Mary <@machine.tld:mary@example.net>, , jdoe@test   . example'],
3447             [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]),
3448
3449            (['John Doe <jdoe@machine(comment).  example>'],
3450             [('John Doe (comment)', 'jdoe@machine.example')]),
3451
3452            (['"Mary Smith: Personal Account" <smith@home.example>'],
3453             [('Mary Smith: Personal Account', 'smith@home.example')]),
3454
3455            (['Undisclosed recipients:;'],
3456             [('', '')]),
3457
3458            ([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'],
3459             [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]),
3460        ):
3461            with self.subTest(addresses=addresses):
3462                self.assertEqual(utils.getaddresses(addresses),
3463                                 expected)
3464                self.assertEqual(utils.getaddresses(addresses, strict=False),
3465                                 expected)
3466
3467        addresses = ['[]*-- =~$']
3468        self.assertEqual(utils.getaddresses(addresses),
3469                         [('', '')])
3470        self.assertEqual(utils.getaddresses(addresses, strict=False),
3471                         [('', ''), ('', ''), ('', '*--')])
3472
3473    def test_getaddresses_embedded_comment(self):
3474        """Test proper handling of a nested comment"""
3475        eq = self.assertEqual
3476        addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3477        eq(addrs[0][1], 'foo@bar.com')
3478
3479    def test_getaddresses_header_obj(self):
3480        """Test the handling of a Header object."""
3481        addrs = utils.getaddresses([Header('Al Person <aperson@dom.ain>')])
3482        self.assertEqual(addrs[0][1], 'aperson@dom.ain')
3483
3484    @threading_helper.requires_working_threading()
3485    def test_make_msgid_collisions(self):
3486        # Test make_msgid uniqueness, even with multiple threads
3487        class MsgidsThread(Thread):
3488            def run(self):
3489                # generate msgids for 3 seconds
3490                self.msgids = []
3491                append = self.msgids.append
3492                make_msgid = utils.make_msgid
3493                clock = time.monotonic
3494                tfin = clock() + 3.0
3495                while clock() < tfin:
3496                    append(make_msgid(domain='testdomain-string'))
3497
3498        threads = [MsgidsThread() for i in range(5)]
3499        with threading_helper.start_threads(threads):
3500            pass
3501        all_ids = sum([t.msgids for t in threads], [])
3502        self.assertEqual(len(set(all_ids)), len(all_ids))
3503
3504    def test_utils_quote_unquote(self):
3505        eq = self.assertEqual
3506        msg = Message()
3507        msg.add_header('content-disposition', 'attachment',
3508                       filename='foo\\wacky"name')
3509        eq(msg.get_filename(), 'foo\\wacky"name')
3510
3511    def test_get_body_encoding_with_bogus_charset(self):
3512        charset = Charset('not a charset')
3513        self.assertEqual(charset.get_body_encoding(), 'base64')
3514
3515    def test_get_body_encoding_with_uppercase_charset(self):
3516        eq = self.assertEqual
3517        msg = Message()
3518        msg['Content-Type'] = 'text/plain; charset=UTF-8'
3519        eq(msg['content-type'], 'text/plain; charset=UTF-8')
3520        charsets = msg.get_charsets()
3521        eq(len(charsets), 1)
3522        eq(charsets[0], 'utf-8')
3523        charset = Charset(charsets[0])
3524        eq(charset.get_body_encoding(), 'base64')
3525        msg.set_payload(b'hello world', charset=charset)
3526        eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3527        eq(msg.get_payload(decode=True), b'hello world')
3528        eq(msg['content-transfer-encoding'], 'base64')
3529        # Try another one
3530        msg = Message()
3531        msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3532        charsets = msg.get_charsets()
3533        eq(len(charsets), 1)
3534        eq(charsets[0], 'us-ascii')
3535        charset = Charset(charsets[0])
3536        eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3537        msg.set_payload('hello world', charset=charset)
3538        eq(msg.get_payload(), 'hello world')
3539        eq(msg['content-transfer-encoding'], '7bit')
3540
3541    def test_charsets_case_insensitive(self):
3542        lc = Charset('us-ascii')
3543        uc = Charset('US-ASCII')
3544        self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3545
3546    def test_partial_falls_inside_message_delivery_status(self):
3547        eq = self.ndiffAssertEqual
3548        # The Parser interface provides chunks of data to FeedParser in 8192
3549        # byte gulps.  SF bug #1076485 found one of those chunks inside
3550        # message/delivery-status header block, which triggered an
3551        # unreadline() of NeedMoreData.
3552        msg = self._msgobj('msg_43.txt')
3553        sfp = StringIO()
3554        iterators._structure(msg, sfp)
3555        eq(sfp.getvalue(), """\
3556multipart/report
3557    text/plain
3558    message/delivery-status
3559        text/plain
3560        text/plain
3561        text/plain
3562        text/plain
3563        text/plain
3564        text/plain
3565        text/plain
3566        text/plain
3567        text/plain
3568        text/plain
3569        text/plain
3570        text/plain
3571        text/plain
3572        text/plain
3573        text/plain
3574        text/plain
3575        text/plain
3576        text/plain
3577        text/plain
3578        text/plain
3579        text/plain
3580        text/plain
3581        text/plain
3582        text/plain
3583        text/plain
3584        text/plain
3585    text/rfc822-headers
3586""")
3587
3588    def test_make_msgid_domain(self):
3589        self.assertEqual(
3590            email.utils.make_msgid(domain='testdomain-string')[-19:],
3591            '@testdomain-string>')
3592
3593    def test_make_msgid_idstring(self):
3594        self.assertEqual(
3595            email.utils.make_msgid(idstring='test-idstring',
3596                domain='testdomain-string')[-33:],
3597            '.test-idstring@testdomain-string>')
3598
3599    def test_make_msgid_default_domain(self):
3600        with patch('socket.getfqdn') as mock_getfqdn:
3601            mock_getfqdn.return_value = domain = 'pythontest.example.com'
3602            self.assertTrue(
3603                email.utils.make_msgid().endswith(
3604                    '@' + domain + '>'))
3605
3606    def test_Generator_linend(self):
3607        # Issue 14645.
3608        with openfile('msg_26.txt', encoding="utf-8", newline='\n') as f:
3609            msgtxt = f.read()
3610        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3611        msg = email.message_from_string(msgtxt)
3612        s = StringIO()
3613        g = email.generator.Generator(s)
3614        g.flatten(msg)
3615        self.assertEqual(s.getvalue(), msgtxt_nl)
3616
3617    def test_BytesGenerator_linend(self):
3618        # Issue 14645.
3619        with openfile('msg_26.txt', encoding="utf-8", newline='\n') as f:
3620            msgtxt = f.read()
3621        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3622        msg = email.message_from_string(msgtxt_nl)
3623        s = BytesIO()
3624        g = email.generator.BytesGenerator(s)
3625        g.flatten(msg, linesep='\r\n')
3626        self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3627
3628    def test_BytesGenerator_linend_with_non_ascii(self):
3629        # Issue 14645.
3630        with openfile('msg_26.txt', 'rb') as f:
3631            msgtxt = f.read()
3632        msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3633        msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3634        msg = email.message_from_bytes(msgtxt_nl)
3635        s = BytesIO()
3636        g = email.generator.BytesGenerator(s)
3637        g.flatten(msg, linesep='\r\n')
3638        self.assertEqual(s.getvalue(), msgtxt)
3639
3640    def test_mime_classes_policy_argument(self):
3641        with openfile('sndhdr.au', 'rb') as fp:
3642            audiodata = fp.read()
3643        with openfile('python.gif', 'rb') as fp:
3644            bindata = fp.read()
3645        classes = [
3646            (MIMEApplication, ('',)),
3647            (MIMEAudio, (audiodata,)),
3648            (MIMEImage, (bindata,)),
3649            (MIMEMessage, (Message(),)),
3650            (MIMENonMultipart, ('multipart', 'mixed')),
3651            (MIMEText, ('',)),
3652        ]
3653        for cls, constructor in classes:
3654            with self.subTest(cls=cls.__name__, policy='compat32'):
3655                m = cls(*constructor)
3656                self.assertIs(m.policy, email.policy.compat32)
3657            with self.subTest(cls=cls.__name__, policy='default'):
3658                m = cls(*constructor, policy=email.policy.default)
3659                self.assertIs(m.policy, email.policy.default)
3660
3661    def test_iter_escaped_chars(self):
3662        self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')),
3663                         [(0, 'a'),
3664                          (2, '\\\\'),
3665                          (3, 'b'),
3666                          (5, '\\"'),
3667                          (6, 'c'),
3668                          (8, '\\\\'),
3669                          (9, '"'),
3670                          (10, 'd')])
3671        self.assertEqual(list(utils._iter_escaped_chars('a\\')),
3672                         [(0, 'a'), (1, '\\')])
3673
3674    def test_strip_quoted_realnames(self):
3675        def check(addr, expected):
3676            self.assertEqual(utils._strip_quoted_realnames(addr), expected)
3677
3678        check('"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>',
3679              ' <jane@example.net>,  <john@example.net>')
3680        check(r'"Jane \"Doe\"." <jane@example.net>',
3681              ' <jane@example.net>')
3682
3683        # special cases
3684        check(r'before"name"after', 'beforeafter')
3685        check(r'before"name"', 'before')
3686        check(r'b"name"', 'b')  # single char
3687        check(r'"name"after', 'after')
3688        check(r'"name"a', 'a')  # single char
3689        check(r'"name"', '')
3690
3691        # no change
3692        for addr in (
3693            'Jane Doe <jane@example.net>, John Doe <john@example.net>',
3694            'lone " quote',
3695        ):
3696            self.assertEqual(utils._strip_quoted_realnames(addr), addr)
3697
3698
3699    def test_check_parenthesis(self):
3700        addr = 'alice@example.net'
3701        self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)'))
3702        self.assertFalse(utils._check_parenthesis(f'{addr} )Alice('))
3703        self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))'))
3704        self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)'))
3705
3706        # Ignore real name between quotes
3707        self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}'))
3708
3709# Test the iterator/generators
3710class TestIterators(TestEmailBase):
3711    def test_body_line_iterator(self):
3712        eq = self.assertEqual
3713        neq = self.ndiffAssertEqual
3714        # First a simple non-multipart message
3715        msg = self._msgobj('msg_01.txt')
3716        it = iterators.body_line_iterator(msg)
3717        lines = list(it)
3718        eq(len(lines), 6)
3719        neq(EMPTYSTRING.join(lines), msg.get_payload())
3720        # Now a more complicated multipart
3721        msg = self._msgobj('msg_02.txt')
3722        it = iterators.body_line_iterator(msg)
3723        lines = list(it)
3724        eq(len(lines), 43)
3725        with openfile('msg_19.txt', encoding="utf-8") as fp:
3726            neq(EMPTYSTRING.join(lines), fp.read())
3727
3728    def test_typed_subpart_iterator(self):
3729        eq = self.assertEqual
3730        msg = self._msgobj('msg_04.txt')
3731        it = iterators.typed_subpart_iterator(msg, 'text')
3732        lines = []
3733        subparts = 0
3734        for subpart in it:
3735            subparts += 1
3736            lines.append(subpart.get_payload())
3737        eq(subparts, 2)
3738        eq(EMPTYSTRING.join(lines), """\
3739a simple kind of mirror
3740to reflect upon our own
3741a simple kind of mirror
3742to reflect upon our own
3743""")
3744
3745    def test_typed_subpart_iterator_default_type(self):
3746        eq = self.assertEqual
3747        msg = self._msgobj('msg_03.txt')
3748        it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3749        lines = []
3750        subparts = 0
3751        for subpart in it:
3752            subparts += 1
3753            lines.append(subpart.get_payload())
3754        eq(subparts, 1)
3755        eq(EMPTYSTRING.join(lines), """\
3756
3757Hi,
3758
3759Do you like this message?
3760
3761-Me
3762""")
3763
3764    def test_pushCR_LF(self):
3765        '''FeedParser BufferedSubFile.push() assumed it received complete
3766           line endings.  A CR ending one push() followed by a LF starting
3767           the next push() added an empty line.
3768        '''
3769        imt = [
3770            ("a\r \n",  2),
3771            ("b",       0),
3772            ("c\n",     1),
3773            ("",        0),
3774            ("d\r\n",   1),
3775            ("e\r",     0),
3776            ("\nf",     1),
3777            ("\r\n",    1),
3778          ]
3779        from email.feedparser import BufferedSubFile, NeedMoreData
3780        bsf = BufferedSubFile()
3781        om = []
3782        nt = 0
3783        for il, n in imt:
3784            bsf.push(il)
3785            nt += n
3786            n1 = 0
3787            for ol in iter(bsf.readline, NeedMoreData):
3788                om.append(ol)
3789                n1 += 1
3790            self.assertEqual(n, n1)
3791        self.assertEqual(len(om), nt)
3792        self.assertEqual(''.join([il for il, n in imt]), ''.join(om))
3793
3794    def test_push_random(self):
3795        from email.feedparser import BufferedSubFile, NeedMoreData
3796
3797        n = 10000
3798        chunksize = 5
3799        chars = 'abcd \t\r\n'
3800
3801        s = ''.join(choice(chars) for i in range(n)) + '\n'
3802        target = s.splitlines(True)
3803
3804        bsf = BufferedSubFile()
3805        lines = []
3806        for i in range(0, len(s), chunksize):
3807            chunk = s[i:i+chunksize]
3808            bsf.push(chunk)
3809            lines.extend(iter(bsf.readline, NeedMoreData))
3810        self.assertEqual(lines, target)
3811
3812
3813class TestFeedParsers(TestEmailBase):
3814
3815    def parse(self, chunks):
3816        feedparser = FeedParser()
3817        for chunk in chunks:
3818            feedparser.feed(chunk)
3819        return feedparser.close()
3820
3821    def test_empty_header_name_handled(self):
3822        # Issue 19996
3823        msg = self.parse("First: val\n: bad\nSecond: val")
3824        self.assertEqual(msg['First'], 'val')
3825        self.assertEqual(msg['Second'], 'val')
3826
3827    def test_newlines(self):
3828        m = self.parse(['a:\nb:\rc:\r\nd:\n'])
3829        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3830        m = self.parse(['a:\nb:\rc:\r\nd:'])
3831        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3832        m = self.parse(['a:\rb', 'c:\n'])
3833        self.assertEqual(m.keys(), ['a', 'bc'])
3834        m = self.parse(['a:\r', 'b:\n'])
3835        self.assertEqual(m.keys(), ['a', 'b'])
3836        m = self.parse(['a:\r', '\nb:\n'])
3837        self.assertEqual(m.keys(), ['a', 'b'])
3838
3839        # Only CR and LF should break header fields
3840        m = self.parse(['a:\x85b:\u2028c:\n'])
3841        self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')])
3842        m = self.parse(['a:\r', 'b:\x85', 'c:\n'])
3843        self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')])
3844
3845    def test_long_lines(self):
3846        # Expected peak memory use on 32-bit platform: 6*N*M bytes.
3847        M, N = 1000, 20000
3848        m = self.parse(['a:b\n\n'] + ['x'*M] * N)
3849        self.assertEqual(m.items(), [('a', 'b')])
3850        self.assertEqual(m.get_payload(), 'x'*M*N)
3851        m = self.parse(['a:b\r\r'] + ['x'*M] * N)
3852        self.assertEqual(m.items(), [('a', 'b')])
3853        self.assertEqual(m.get_payload(), 'x'*M*N)
3854        m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N)
3855        self.assertEqual(m.items(), [('a', 'b')])
3856        self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N)
3857        m = self.parse(['a:\r', 'b: '] + ['x'*M] * N)
3858        self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)])
3859
3860
3861class TestParsers(TestEmailBase):
3862
3863    def test_header_parser(self):
3864        eq = self.assertEqual
3865        # Parse only the headers of a complex multipart MIME document
3866        with openfile('msg_02.txt', encoding="utf-8") as fp:
3867            msg = HeaderParser().parse(fp)
3868        eq(msg['from'], 'ppp-request@zzz.org')
3869        eq(msg['to'], 'ppp@zzz.org')
3870        eq(msg.get_content_type(), 'multipart/mixed')
3871        self.assertFalse(msg.is_multipart())
3872        self.assertIsInstance(msg.get_payload(), str)
3873
3874    def test_bytes_header_parser(self):
3875        eq = self.assertEqual
3876        # Parse only the headers of a complex multipart MIME document
3877        with openfile('msg_02.txt', 'rb') as fp:
3878            msg = email.parser.BytesHeaderParser().parse(fp)
3879        eq(msg['from'], 'ppp-request@zzz.org')
3880        eq(msg['to'], 'ppp@zzz.org')
3881        eq(msg.get_content_type(), 'multipart/mixed')
3882        self.assertFalse(msg.is_multipart())
3883        self.assertIsInstance(msg.get_payload(), str)
3884        self.assertIsInstance(msg.get_payload(decode=True), bytes)
3885
3886    def test_bytes_parser_does_not_close_file(self):
3887        with openfile('msg_02.txt', 'rb') as fp:
3888            email.parser.BytesParser().parse(fp)
3889            self.assertFalse(fp.closed)
3890
3891    def test_bytes_parser_on_exception_does_not_close_file(self):
3892        with openfile('msg_15.txt', 'rb') as fp:
3893            bytesParser = email.parser.BytesParser
3894            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3895                              bytesParser(policy=email.policy.strict).parse,
3896                              fp)
3897            self.assertFalse(fp.closed)
3898
3899    def test_parser_does_not_close_file(self):
3900        with openfile('msg_02.txt', encoding="utf-8") as fp:
3901            email.parser.Parser().parse(fp)
3902            self.assertFalse(fp.closed)
3903
3904    def test_parser_on_exception_does_not_close_file(self):
3905        with openfile('msg_15.txt', encoding="utf-8") as fp:
3906            parser = email.parser.Parser
3907            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3908                              parser(policy=email.policy.strict).parse, fp)
3909            self.assertFalse(fp.closed)
3910
3911    def test_whitespace_continuation(self):
3912        eq = self.assertEqual
3913        # This message contains a line after the Subject: header that has only
3914        # whitespace, but it is not empty!
3915        msg = email.message_from_string("""\
3916From: aperson@dom.ain
3917To: bperson@dom.ain
3918Subject: the next line has a space on it
3919\x20
3920Date: Mon, 8 Apr 2002 15:09:19 -0400
3921Message-ID: spam
3922
3923Here's the message body
3924""")
3925        eq(msg['subject'], 'the next line has a space on it\n ')
3926        eq(msg['message-id'], 'spam')
3927        eq(msg.get_payload(), "Here's the message body\n")
3928
3929    def test_whitespace_continuation_last_header(self):
3930        eq = self.assertEqual
3931        # Like the previous test, but the subject line is the last
3932        # header.
3933        msg = email.message_from_string("""\
3934From: aperson@dom.ain
3935To: bperson@dom.ain
3936Date: Mon, 8 Apr 2002 15:09:19 -0400
3937Message-ID: spam
3938Subject: the next line has a space on it
3939\x20
3940
3941Here's the message body
3942""")
3943        eq(msg['subject'], 'the next line has a space on it\n ')
3944        eq(msg['message-id'], 'spam')
3945        eq(msg.get_payload(), "Here's the message body\n")
3946
3947    def test_crlf_separation(self):
3948        eq = self.assertEqual
3949        with openfile('msg_26.txt', encoding="utf-8", newline='\n') as fp:
3950            msg = Parser().parse(fp)
3951        eq(len(msg.get_payload()), 2)
3952        part1 = msg.get_payload(0)
3953        eq(part1.get_content_type(), 'text/plain')
3954        eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3955        part2 = msg.get_payload(1)
3956        eq(part2.get_content_type(), 'application/riscos')
3957
3958    def test_crlf_flatten(self):
3959        # Using newline='\n' preserves the crlfs in this input file.
3960        with openfile('msg_26.txt', encoding="utf-8", newline='\n') as fp:
3961            text = fp.read()
3962        msg = email.message_from_string(text)
3963        s = StringIO()
3964        g = Generator(s)
3965        g.flatten(msg, linesep='\r\n')
3966        self.assertEqual(s.getvalue(), text)
3967
3968    maxDiff = None
3969
3970    def test_multipart_digest_with_extra_mime_headers(self):
3971        eq = self.assertEqual
3972        neq = self.ndiffAssertEqual
3973        with openfile('msg_28.txt', encoding="utf-8") as fp:
3974            msg = email.message_from_file(fp)
3975        # Structure is:
3976        # multipart/digest
3977        #   message/rfc822
3978        #     text/plain
3979        #   message/rfc822
3980        #     text/plain
3981        eq(msg.is_multipart(), 1)
3982        eq(len(msg.get_payload()), 2)
3983        part1 = msg.get_payload(0)
3984        eq(part1.get_content_type(), 'message/rfc822')
3985        eq(part1.is_multipart(), 1)
3986        eq(len(part1.get_payload()), 1)
3987        part1a = part1.get_payload(0)
3988        eq(part1a.is_multipart(), 0)
3989        eq(part1a.get_content_type(), 'text/plain')
3990        neq(part1a.get_payload(), 'message 1\n')
3991        # next message/rfc822
3992        part2 = msg.get_payload(1)
3993        eq(part2.get_content_type(), 'message/rfc822')
3994        eq(part2.is_multipart(), 1)
3995        eq(len(part2.get_payload()), 1)
3996        part2a = part2.get_payload(0)
3997        eq(part2a.is_multipart(), 0)
3998        eq(part2a.get_content_type(), 'text/plain')
3999        neq(part2a.get_payload(), 'message 2\n')
4000
4001    def test_three_lines(self):
4002        # A bug report by Andrew McNamara
4003        lines = ['From: Andrew Person <aperson@dom.ain',
4004                 'Subject: Test',
4005                 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
4006        msg = email.message_from_string(NL.join(lines))
4007        self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
4008
4009    def test_strip_line_feed_and_carriage_return_in_headers(self):
4010        eq = self.assertEqual
4011        # For [ 1002475 ] email message parser doesn't handle \r\n correctly
4012        value1 = 'text'
4013        value2 = 'more text'
4014        m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
4015            value1, value2)
4016        msg = email.message_from_string(m)
4017        eq(msg.get('Header'), value1)
4018        eq(msg.get('Next-Header'), value2)
4019
4020    def test_rfc2822_header_syntax(self):
4021        eq = self.assertEqual
4022        m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
4023        msg = email.message_from_string(m)
4024        eq(len(msg), 3)
4025        eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
4026        eq(msg.get_payload(), 'body')
4027
4028    def test_rfc2822_space_not_allowed_in_header(self):
4029        eq = self.assertEqual
4030        m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
4031        msg = email.message_from_string(m)
4032        eq(len(msg.keys()), 0)
4033
4034    def test_rfc2822_one_character_header(self):
4035        eq = self.assertEqual
4036        m = 'A: first header\nB: second header\nCC: third header\n\nbody'
4037        msg = email.message_from_string(m)
4038        headers = msg.keys()
4039        headers.sort()
4040        eq(headers, ['A', 'B', 'CC'])
4041        eq(msg.get_payload(), 'body')
4042
4043    def test_CRLFLF_at_end_of_part(self):
4044        # issue 5610: feedparser should not eat two chars from body part ending
4045        # with "\r\n\n".
4046        m = (
4047            "From: foo@bar.com\n"
4048            "To: baz\n"
4049            "Mime-Version: 1.0\n"
4050            "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
4051            "\n"
4052            "--BOUNDARY\n"
4053            "Content-Type: text/plain\n"
4054            "\n"
4055            "body ending with CRLF newline\r\n"
4056            "\n"
4057            "--BOUNDARY--\n"
4058          )
4059        msg = email.message_from_string(m)
4060        self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
4061
4062
4063class Test8BitBytesHandling(TestEmailBase):
4064    # In Python3 all input is string, but that doesn't work if the actual input
4065    # uses an 8bit transfer encoding.  To hack around that, in email 5.1 we
4066    # decode byte streams using the surrogateescape error handler, and
4067    # reconvert to binary at appropriate places if we detect surrogates.  This
4068    # doesn't allow us to transform headers with 8bit bytes (they get munged),
4069    # but it does allow us to parse and preserve them, and to decode body
4070    # parts that use an 8bit CTE.
4071
4072    bodytest_msg = textwrap.dedent("""\
4073        From: foo@bar.com
4074        To: baz
4075        Mime-Version: 1.0
4076        Content-Type: text/plain; charset={charset}
4077        Content-Transfer-Encoding: {cte}
4078
4079        {bodyline}
4080        """)
4081
4082    def test_known_8bit_CTE(self):
4083        m = self.bodytest_msg.format(charset='utf-8',
4084                                     cte='8bit',
4085                                     bodyline='pöstal').encode('utf-8')
4086        msg = email.message_from_bytes(m)
4087        self.assertEqual(msg.get_payload(), "pöstal\n")
4088        self.assertEqual(msg.get_payload(decode=True),
4089                         "pöstal\n".encode('utf-8'))
4090
4091    def test_unknown_8bit_CTE(self):
4092        m = self.bodytest_msg.format(charset='notavalidcharset',
4093                                     cte='8bit',
4094                                     bodyline='pöstal').encode('utf-8')
4095        msg = email.message_from_bytes(m)
4096        self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
4097        self.assertEqual(msg.get_payload(decode=True),
4098                         "pöstal\n".encode('utf-8'))
4099
4100    def test_8bit_in_quopri_body(self):
4101        # This is non-RFC compliant data...without 'decode' the library code
4102        # decodes the body using the charset from the headers, and because the
4103        # source byte really is utf-8 this works.  This is likely to fail
4104        # against real dirty data (ie: produce mojibake), but the data is
4105        # invalid anyway so it is as good a guess as any.  But this means that
4106        # this test just confirms the current behavior; that behavior is not
4107        # necessarily the best possible behavior.  With 'decode' it is
4108        # returning the raw bytes, so that test should be of correct behavior,
4109        # or at least produce the same result that email4 did.
4110        m = self.bodytest_msg.format(charset='utf-8',
4111                                     cte='quoted-printable',
4112                                     bodyline='p=C3=B6stál').encode('utf-8')
4113        msg = email.message_from_bytes(m)
4114        self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
4115        self.assertEqual(msg.get_payload(decode=True),
4116                         'pöstál\n'.encode('utf-8'))
4117
4118    def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
4119        # This is similar to the previous test, but proves that if the 8bit
4120        # byte is undecodeable in the specified charset, it gets replaced
4121        # by the unicode 'unknown' character.  Again, this may or may not
4122        # be the ideal behavior.  Note that if decode=False none of the
4123        # decoders will get involved, so this is the only test we need
4124        # for this behavior.
4125        m = self.bodytest_msg.format(charset='ascii',
4126                                     cte='quoted-printable',
4127                                     bodyline='p=C3=B6stál').encode('utf-8')
4128        msg = email.message_from_bytes(m)
4129        self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
4130        self.assertEqual(msg.get_payload(decode=True),
4131                        'pöstál\n'.encode('utf-8'))
4132
4133    # test_defect_handling:test_invalid_chars_in_base64_payload
4134    def test_8bit_in_base64_body(self):
4135        # If we get 8bit bytes in a base64 body, we can just ignore them
4136        # as being outside the base64 alphabet and decode anyway.  But
4137        # we register a defect.
4138        m = self.bodytest_msg.format(charset='utf-8',
4139                                     cte='base64',
4140                                     bodyline='cMO2c3RhbAá=').encode('utf-8')
4141        msg = email.message_from_bytes(m)
4142        self.assertEqual(msg.get_payload(decode=True),
4143                         'pöstal'.encode('utf-8'))
4144        self.assertIsInstance(msg.defects[0],
4145                              errors.InvalidBase64CharactersDefect)
4146
4147    def test_8bit_in_uuencode_body(self):
4148        # Sticking an 8bit byte in a uuencode block makes it undecodable by
4149        # normal means, so the block is returned undecoded, but as bytes.
4150        m = self.bodytest_msg.format(charset='utf-8',
4151                                     cte='uuencode',
4152                                     bodyline='<,.V<W1A; á ').encode('utf-8')
4153        msg = email.message_from_bytes(m)
4154        self.assertEqual(msg.get_payload(decode=True),
4155                         '<,.V<W1A; á \n'.encode('utf-8'))
4156
4157
4158    headertest_headers = (
4159        ('From: foo@bar.com', ('From', 'foo@bar.com')),
4160        ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
4161        ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
4162            '\tJean de Baddie',
4163            ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
4164                'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
4165                ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
4166        ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
4167        )
4168    headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
4169        '\nYes, they are flying.\n').encode('utf-8')
4170
4171    def test_get_8bit_header(self):
4172        msg = email.message_from_bytes(self.headertest_msg)
4173        self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
4174        self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
4175
4176    def test_print_8bit_headers(self):
4177        msg = email.message_from_bytes(self.headertest_msg)
4178        self.assertEqual(str(msg),
4179                         textwrap.dedent("""\
4180                            From: {}
4181                            To: {}
4182                            Subject: {}
4183                            From: {}
4184
4185                            Yes, they are flying.
4186                            """).format(*[expected[1] for (_, expected) in
4187                                        self.headertest_headers]))
4188
4189    def test_values_with_8bit_headers(self):
4190        msg = email.message_from_bytes(self.headertest_msg)
4191        self.assertListEqual([str(x) for x in msg.values()],
4192                              ['foo@bar.com',
4193                               'b\uFFFD\uFFFDz',
4194                               'Maintenant je vous pr\uFFFD\uFFFDsente mon '
4195                                   'coll\uFFFD\uFFFDgue, le pouf '
4196                                   'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
4197                                   '\tJean de Baddie',
4198                               "g\uFFFD\uFFFDst"])
4199
4200    def test_items_with_8bit_headers(self):
4201        msg = email.message_from_bytes(self.headertest_msg)
4202        self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
4203                              [('From', 'foo@bar.com'),
4204                               ('To', 'b\uFFFD\uFFFDz'),
4205                               ('Subject', 'Maintenant je vous '
4206                                  'pr\uFFFD\uFFFDsente '
4207                                  'mon coll\uFFFD\uFFFDgue, le pouf '
4208                                  'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
4209                                  '\tJean de Baddie'),
4210                               ('From', 'g\uFFFD\uFFFDst')])
4211
4212    def test_get_all_with_8bit_headers(self):
4213        msg = email.message_from_bytes(self.headertest_msg)
4214        self.assertListEqual([str(x) for x in msg.get_all('from')],
4215                              ['foo@bar.com',
4216                               'g\uFFFD\uFFFDst'])
4217
4218    def test_get_content_type_with_8bit(self):
4219        msg = email.message_from_bytes(textwrap.dedent("""\
4220            Content-Type: text/pl\xA7in; charset=utf-8
4221            """).encode('latin-1'))
4222        self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
4223        self.assertEqual(msg.get_content_maintype(), "text")
4224        self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
4225
4226    # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
4227    def test_get_params_with_8bit(self):
4228        msg = email.message_from_bytes(
4229            'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
4230        self.assertEqual(msg.get_params(header='x-header'),
4231           [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
4232        self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
4233        # XXX: someday you might be able to get 'b\xa7r', for now you can't.
4234        self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
4235
4236    # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
4237    def test_get_rfc2231_params_with_8bit(self):
4238        msg = email.message_from_bytes(textwrap.dedent("""\
4239            Content-Type: text/plain; charset=us-ascii;
4240             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
4241             ).encode('latin-1'))
4242        self.assertEqual(msg.get_param('title'),
4243            ('us-ascii', 'en', 'This is not f\uFFFDn'))
4244
4245    def test_set_rfc2231_params_with_8bit(self):
4246        msg = email.message_from_bytes(textwrap.dedent("""\
4247            Content-Type: text/plain; charset=us-ascii;
4248             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
4249             ).encode('latin-1'))
4250        msg.set_param('title', 'test')
4251        self.assertEqual(msg.get_param('title'), 'test')
4252
4253    def test_del_rfc2231_params_with_8bit(self):
4254        msg = email.message_from_bytes(textwrap.dedent("""\
4255            Content-Type: text/plain; charset=us-ascii;
4256             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
4257             ).encode('latin-1'))
4258        msg.del_param('title')
4259        self.assertEqual(msg.get_param('title'), None)
4260        self.assertEqual(msg.get_content_maintype(), 'text')
4261
4262    def test_get_payload_with_8bit_cte_header(self):
4263        msg = email.message_from_bytes(textwrap.dedent("""\
4264            Content-Transfer-Encoding: b\xa7se64
4265            Content-Type: text/plain; charset=latin-1
4266
4267            payload
4268            """).encode('latin-1'))
4269        self.assertEqual(msg.get_payload(), 'payload\n')
4270        self.assertEqual(msg.get_payload(decode=True), b'payload\n')
4271
4272    non_latin_bin_msg = textwrap.dedent("""\
4273        From: foo@bar.com
4274        To: báz
4275        Subject: Maintenant je vous présente mon collègue, le pouf célèbre
4276        \tJean de Baddie
4277        Mime-Version: 1.0
4278        Content-Type: text/plain; charset="utf-8"
4279        Content-Transfer-Encoding: 8bit
4280
4281        Да, они летят.
4282        """).encode('utf-8')
4283
4284    def test_bytes_generator(self):
4285        msg = email.message_from_bytes(self.non_latin_bin_msg)
4286        out = BytesIO()
4287        email.generator.BytesGenerator(out).flatten(msg)
4288        self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
4289
4290    def test_bytes_generator_handles_None_body(self):
4291        #Issue 11019
4292        msg = email.message.Message()
4293        out = BytesIO()
4294        email.generator.BytesGenerator(out).flatten(msg)
4295        self.assertEqual(out.getvalue(), b"\n")
4296
4297    non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
4298        From: foo@bar.com
4299        To: =?unknown-8bit?q?b=C3=A1z?=
4300        Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
4301         =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
4302         =?unknown-8bit?q?_Jean_de_Baddie?=
4303        Mime-Version: 1.0
4304        Content-Type: text/plain; charset="utf-8"
4305        Content-Transfer-Encoding: base64
4306
4307        0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
4308        """)
4309
4310    def test_generator_handles_8bit(self):
4311        msg = email.message_from_bytes(self.non_latin_bin_msg)
4312        out = StringIO()
4313        email.generator.Generator(out).flatten(msg)
4314        self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
4315
4316    def test_str_generator_should_not_mutate_msg_when_handling_8bit(self):
4317        msg = email.message_from_bytes(self.non_latin_bin_msg)
4318        out = BytesIO()
4319        BytesGenerator(out).flatten(msg)
4320        orig_value = out.getvalue()
4321        Generator(StringIO()).flatten(msg) # Should not mutate msg!
4322        out = BytesIO()
4323        BytesGenerator(out).flatten(msg)
4324        self.assertEqual(out.getvalue(), orig_value)
4325
4326    def test_bytes_generator_with_unix_from(self):
4327        # The unixfrom contains a current date, so we can't check it
4328        # literally.  Just make sure the first word is 'From' and the
4329        # rest of the message matches the input.
4330        msg = email.message_from_bytes(self.non_latin_bin_msg)
4331        out = BytesIO()
4332        email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
4333        lines = out.getvalue().split(b'\n')
4334        self.assertEqual(lines[0].split()[0], b'From')
4335        self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
4336
4337    non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
4338    non_latin_bin_msg_as7bit[2:4] = [
4339        'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
4340         'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
4341    non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
4342
4343    def test_message_from_binary_file(self):
4344        fn = 'test.msg'
4345        self.addCleanup(unlink, fn)
4346        with open(fn, 'wb') as testfile:
4347            testfile.write(self.non_latin_bin_msg)
4348        with open(fn, 'rb') as testfile:
4349            m = email.parser.BytesParser().parse(testfile)
4350        self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
4351
4352    latin_bin_msg = textwrap.dedent("""\
4353        From: foo@bar.com
4354        To: Dinsdale
4355        Subject: Nudge nudge, wink, wink
4356        Mime-Version: 1.0
4357        Content-Type: text/plain; charset="latin-1"
4358        Content-Transfer-Encoding: 8bit
4359
4360        oh là là, know what I mean, know what I mean?
4361        """).encode('latin-1')
4362
4363    latin_bin_msg_as7bit = textwrap.dedent("""\
4364        From: foo@bar.com
4365        To: Dinsdale
4366        Subject: Nudge nudge, wink, wink
4367        Mime-Version: 1.0
4368        Content-Type: text/plain; charset="iso-8859-1"
4369        Content-Transfer-Encoding: quoted-printable
4370
4371        oh l=E0 l=E0, know what I mean, know what I mean?
4372        """)
4373
4374    def test_string_generator_reencodes_to_quopri_when_appropriate(self):
4375        m = email.message_from_bytes(self.latin_bin_msg)
4376        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4377
4378    def test_decoded_generator_emits_unicode_body(self):
4379        m = email.message_from_bytes(self.latin_bin_msg)
4380        out = StringIO()
4381        email.generator.DecodedGenerator(out).flatten(m)
4382        #DecodedHeader output contains an extra blank line compared
4383        #to the input message.  RDM: not sure if this is a bug or not,
4384        #but it is not specific to the 8bit->7bit conversion.
4385        self.assertEqual(out.getvalue(),
4386            self.latin_bin_msg.decode('latin-1')+'\n')
4387
4388    def test_bytes_feedparser(self):
4389        bfp = email.feedparser.BytesFeedParser()
4390        for i in range(0, len(self.latin_bin_msg), 10):
4391            bfp.feed(self.latin_bin_msg[i:i+10])
4392        m = bfp.close()
4393        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4394
4395    def test_crlf_flatten(self):
4396        with openfile('msg_26.txt', 'rb') as fp:
4397            text = fp.read()
4398        msg = email.message_from_bytes(text)
4399        s = BytesIO()
4400        g = email.generator.BytesGenerator(s)
4401        g.flatten(msg, linesep='\r\n')
4402        self.assertEqual(s.getvalue(), text)
4403
4404    def test_8bit_multipart(self):
4405        # Issue 11605
4406        source = textwrap.dedent("""\
4407            Date: Fri, 18 Mar 2011 17:15:43 +0100
4408            To: foo@example.com
4409            From: foodwatch-Newsletter <bar@example.com>
4410            Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
4411            Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
4412            MIME-Version: 1.0
4413            Content-Type: multipart/alternative;
4414                    boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
4415
4416            --b1_76a486bee62b0d200f33dc2ca08220ad
4417            Content-Type: text/plain; charset="utf-8"
4418            Content-Transfer-Encoding: 8bit
4419
4420            Guten Tag, ,
4421
4422            mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
4423            Nachrichten aus Japan.
4424
4425
4426            --b1_76a486bee62b0d200f33dc2ca08220ad
4427            Content-Type: text/html; charset="utf-8"
4428            Content-Transfer-Encoding: 8bit
4429
4430            <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
4431                "http://www.w3.org/TR/html4/loose.dtd">
4432            <html lang="de">
4433            <head>
4434                    <title>foodwatch - Newsletter</title>
4435            </head>
4436            <body>
4437              <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
4438                 die Nachrichten aus Japan.</p>
4439            </body>
4440            </html>
4441            --b1_76a486bee62b0d200f33dc2ca08220ad--
4442
4443            """).encode('utf-8')
4444        msg = email.message_from_bytes(source)
4445        s = BytesIO()
4446        g = email.generator.BytesGenerator(s)
4447        g.flatten(msg)
4448        self.assertEqual(s.getvalue(), source)
4449
4450    def test_bytes_generator_b_encoding_linesep(self):
4451        # Issue 14062: b encoding was tacking on an extra \n.
4452        m = Message()
4453        # This has enough non-ascii that it should always end up b encoded.
4454        m['Subject'] = Header('žluťoučký kůň')
4455        s = BytesIO()
4456        g = email.generator.BytesGenerator(s)
4457        g.flatten(m, linesep='\r\n')
4458        self.assertEqual(
4459            s.getvalue(),
4460            b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4461
4462    def test_generator_b_encoding_linesep(self):
4463        # Since this broke in ByteGenerator, test Generator for completeness.
4464        m = Message()
4465        # This has enough non-ascii that it should always end up b encoded.
4466        m['Subject'] = Header('žluťoučký kůň')
4467        s = StringIO()
4468        g = email.generator.Generator(s)
4469        g.flatten(m, linesep='\r\n')
4470        self.assertEqual(
4471            s.getvalue(),
4472            'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4473
4474    maxDiff = None
4475
4476
4477class BaseTestBytesGeneratorIdempotent:
4478
4479    maxDiff = None
4480
4481    def _msgobj(self, filename):
4482        with openfile(filename, 'rb') as fp:
4483            data = fp.read()
4484        data = self.normalize_linesep_regex.sub(self.blinesep, data)
4485        msg = email.message_from_bytes(data)
4486        return msg, data
4487
4488    def _idempotent(self, msg, data, unixfrom=False):
4489        b = BytesIO()
4490        g = email.generator.BytesGenerator(b, maxheaderlen=0)
4491        g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
4492        self.assertEqual(data, b.getvalue())
4493
4494
4495class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
4496                                    TestIdempotent):
4497    linesep = '\n'
4498    blinesep = b'\n'
4499    normalize_linesep_regex = re.compile(br'\r\n')
4500
4501
4502class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
4503                                       TestIdempotent):
4504    linesep = '\r\n'
4505    blinesep = b'\r\n'
4506    normalize_linesep_regex = re.compile(br'(?<!\r)\n')
4507
4508
4509class TestBase64(unittest.TestCase):
4510    def test_len(self):
4511        eq = self.assertEqual
4512        eq(base64mime.header_length('hello'),
4513           len(base64mime.body_encode(b'hello', eol='')))
4514        for size in range(15):
4515            if   size == 0 : bsize = 0
4516            elif size <= 3 : bsize = 4
4517            elif size <= 6 : bsize = 8
4518            elif size <= 9 : bsize = 12
4519            elif size <= 12: bsize = 16
4520            else           : bsize = 20
4521            eq(base64mime.header_length('x' * size), bsize)
4522
4523    def test_decode(self):
4524        eq = self.assertEqual
4525        eq(base64mime.decode(''), b'')
4526        eq(base64mime.decode('aGVsbG8='), b'hello')
4527
4528    def test_encode(self):
4529        eq = self.assertEqual
4530        eq(base64mime.body_encode(b''), '')
4531        eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
4532        # Test the binary flag
4533        eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
4534        # Test the maxlinelen arg
4535        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
4536eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4537eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4538eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4539eHh4eCB4eHh4IA==
4540""")
4541        # Test the eol argument
4542        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4543           """\
4544eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4545eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4546eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4547eHh4eCB4eHh4IA==\r
4548""")
4549
4550    def test_header_encode(self):
4551        eq = self.assertEqual
4552        he = base64mime.header_encode
4553        eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
4554        eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
4555        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4556        # Test the charset option
4557        eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
4558        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4559
4560
4561class TestQuopri(unittest.TestCase):
4562    def setUp(self):
4563        # Set of characters (as byte integers) that don't need to be encoded
4564        # in headers.
4565        self.hlit = list(chain(
4566            range(ord('a'), ord('z') + 1),
4567            range(ord('A'), ord('Z') + 1),
4568            range(ord('0'), ord('9') + 1),
4569            (c for c in b'!*+-/')))
4570        # Set of characters (as byte integers) that do need to be encoded in
4571        # headers.
4572        self.hnon = [c for c in range(256) if c not in self.hlit]
4573        assert len(self.hlit) + len(self.hnon) == 256
4574        # Set of characters (as byte integers) that don't need to be encoded
4575        # in bodies.
4576        self.blit = list(range(ord(' '), ord('~') + 1))
4577        self.blit.append(ord('\t'))
4578        self.blit.remove(ord('='))
4579        # Set of characters (as byte integers) that do need to be encoded in
4580        # bodies.
4581        self.bnon = [c for c in range(256) if c not in self.blit]
4582        assert len(self.blit) + len(self.bnon) == 256
4583
4584    def test_quopri_header_check(self):
4585        for c in self.hlit:
4586            self.assertFalse(quoprimime.header_check(c),
4587                        'Should not be header quopri encoded: %s' % chr(c))
4588        for c in self.hnon:
4589            self.assertTrue(quoprimime.header_check(c),
4590                            'Should be header quopri encoded: %s' % chr(c))
4591
4592    def test_quopri_body_check(self):
4593        for c in self.blit:
4594            self.assertFalse(quoprimime.body_check(c),
4595                        'Should not be body quopri encoded: %s' % chr(c))
4596        for c in self.bnon:
4597            self.assertTrue(quoprimime.body_check(c),
4598                            'Should be body quopri encoded: %s' % chr(c))
4599
4600    def test_header_quopri_len(self):
4601        eq = self.assertEqual
4602        eq(quoprimime.header_length(b'hello'), 5)
4603        # RFC 2047 chrome is not included in header_length().
4604        eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
4605           quoprimime.header_length(b'hello') +
4606           # =?xxx?q?...?= means 10 extra characters
4607           10)
4608        eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4609        # RFC 2047 chrome is not included in header_length().
4610        eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
4611           quoprimime.header_length(b'h@e@l@l@o@') +
4612           # =?xxx?q?...?= means 10 extra characters
4613           10)
4614        for c in self.hlit:
4615            eq(quoprimime.header_length(bytes([c])), 1,
4616               'expected length 1 for %r' % chr(c))
4617        for c in self.hnon:
4618            # Space is special; it's encoded to _
4619            if c == ord(' '):
4620                continue
4621            eq(quoprimime.header_length(bytes([c])), 3,
4622               'expected length 3 for %r' % chr(c))
4623        eq(quoprimime.header_length(b' '), 1)
4624
4625    def test_body_quopri_len(self):
4626        eq = self.assertEqual
4627        for c in self.blit:
4628            eq(quoprimime.body_length(bytes([c])), 1)
4629        for c in self.bnon:
4630            eq(quoprimime.body_length(bytes([c])), 3)
4631
4632    def test_quote_unquote_idempotent(self):
4633        for x in range(256):
4634            c = chr(x)
4635            self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4636
4637    def _test_header_encode(self, header, expected_encoded_header, charset=None):
4638        if charset is None:
4639            encoded_header = quoprimime.header_encode(header)
4640        else:
4641            encoded_header = quoprimime.header_encode(header, charset)
4642        self.assertEqual(encoded_header, expected_encoded_header)
4643
4644    def test_header_encode_null(self):
4645        self._test_header_encode(b'', '')
4646
4647    def test_header_encode_one_word(self):
4648        self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4649
4650    def test_header_encode_two_lines(self):
4651        self._test_header_encode(b'hello\nworld',
4652                                '=?iso-8859-1?q?hello=0Aworld?=')
4653
4654    def test_header_encode_non_ascii(self):
4655        self._test_header_encode(b'hello\xc7there',
4656                                '=?iso-8859-1?q?hello=C7there?=')
4657
4658    def test_header_encode_alt_charset(self):
4659        self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4660                charset='iso-8859-2')
4661
4662    def _test_header_decode(self, encoded_header, expected_decoded_header):
4663        decoded_header = quoprimime.header_decode(encoded_header)
4664        self.assertEqual(decoded_header, expected_decoded_header)
4665
4666    def test_header_decode_null(self):
4667        self._test_header_decode('', '')
4668
4669    def test_header_decode_one_word(self):
4670        self._test_header_decode('hello', 'hello')
4671
4672    def test_header_decode_two_lines(self):
4673        self._test_header_decode('hello=0Aworld', 'hello\nworld')
4674
4675    def test_header_decode_non_ascii(self):
4676        self._test_header_decode('hello=C7there', 'hello\xc7there')
4677
4678    def test_header_decode_re_bug_18380(self):
4679        # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4680        self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4681
4682    def _test_decode(self, encoded, expected_decoded, eol=None):
4683        if eol is None:
4684            decoded = quoprimime.decode(encoded)
4685        else:
4686            decoded = quoprimime.decode(encoded, eol=eol)
4687        self.assertEqual(decoded, expected_decoded)
4688
4689    def test_decode_null_word(self):
4690        self._test_decode('', '')
4691
4692    def test_decode_null_line_null_word(self):
4693        self._test_decode('\r\n', '\n')
4694
4695    def test_decode_one_word(self):
4696        self._test_decode('hello', 'hello')
4697
4698    def test_decode_one_word_eol(self):
4699        self._test_decode('hello', 'hello', eol='X')
4700
4701    def test_decode_one_line(self):
4702        self._test_decode('hello\r\n', 'hello\n')
4703
4704    def test_decode_one_line_lf(self):
4705        self._test_decode('hello\n', 'hello\n')
4706
4707    def test_decode_one_line_cr(self):
4708        self._test_decode('hello\r', 'hello\n')
4709
4710    def test_decode_one_line_nl(self):
4711        self._test_decode('hello\n', 'helloX', eol='X')
4712
4713    def test_decode_one_line_crnl(self):
4714        self._test_decode('hello\r\n', 'helloX', eol='X')
4715
4716    def test_decode_one_line_one_word(self):
4717        self._test_decode('hello\r\nworld', 'hello\nworld')
4718
4719    def test_decode_one_line_one_word_eol(self):
4720        self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4721
4722    def test_decode_two_lines(self):
4723        self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4724
4725    def test_decode_two_lines_eol(self):
4726        self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4727
4728    def test_decode_one_long_line(self):
4729        self._test_decode('Spam' * 250, 'Spam' * 250)
4730
4731    def test_decode_one_space(self):
4732        self._test_decode(' ', '')
4733
4734    def test_decode_multiple_spaces(self):
4735        self._test_decode(' ' * 5, '')
4736
4737    def test_decode_one_line_trailing_spaces(self):
4738        self._test_decode('hello    \r\n', 'hello\n')
4739
4740    def test_decode_two_lines_trailing_spaces(self):
4741        self._test_decode('hello    \r\nworld   \r\n', 'hello\nworld\n')
4742
4743    def test_decode_quoted_word(self):
4744        self._test_decode('=22quoted=20words=22', '"quoted words"')
4745
4746    def test_decode_uppercase_quoting(self):
4747        self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4748
4749    def test_decode_lowercase_quoting(self):
4750        self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4751
4752    def test_decode_soft_line_break(self):
4753        self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4754
4755    def test_decode_false_quoting(self):
4756        self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4757
4758    def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4759        kwargs = {}
4760        if maxlinelen is None:
4761            # Use body_encode's default.
4762            maxlinelen = 76
4763        else:
4764            kwargs['maxlinelen'] = maxlinelen
4765        if eol is None:
4766            # Use body_encode's default.
4767            eol = '\n'
4768        else:
4769            kwargs['eol'] = eol
4770        encoded_body = quoprimime.body_encode(body, **kwargs)
4771        self.assertEqual(encoded_body, expected_encoded_body)
4772        if eol == '\n' or eol == '\r\n':
4773            # We know how to split the result back into lines, so maxlinelen
4774            # can be checked.
4775            for line in encoded_body.splitlines():
4776                self.assertLessEqual(len(line), maxlinelen)
4777
4778    def test_encode_null(self):
4779        self._test_encode('', '')
4780
4781    def test_encode_null_lines(self):
4782        self._test_encode('\n\n', '\n\n')
4783
4784    def test_encode_one_line(self):
4785        self._test_encode('hello\n', 'hello\n')
4786
4787    def test_encode_one_line_crlf(self):
4788        self._test_encode('hello\r\n', 'hello\n')
4789
4790    def test_encode_one_line_eol(self):
4791        self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4792
4793    def test_encode_one_line_eol_after_non_ascii(self):
4794        # issue 20206; see changeset 0cf700464177 for why the encode/decode.
4795        self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'),
4796                          'hello=CF=85\r\n', eol='\r\n')
4797
4798    def test_encode_one_space(self):
4799        self._test_encode(' ', '=20')
4800
4801    def test_encode_one_line_one_space(self):
4802        self._test_encode(' \n', '=20\n')
4803
4804# XXX: body_encode() expect strings, but uses ord(char) from these strings
4805# to index into a 256-entry list.  For code points above 255, this will fail.
4806# Should there be a check for 8-bit only ord() values in body, or at least
4807# a comment about the expected input?
4808
4809    def test_encode_two_lines_one_space(self):
4810        self._test_encode(' \n \n', '=20\n=20\n')
4811
4812    def test_encode_one_word_trailing_spaces(self):
4813        self._test_encode('hello   ', 'hello  =20')
4814
4815    def test_encode_one_line_trailing_spaces(self):
4816        self._test_encode('hello   \n', 'hello  =20\n')
4817
4818    def test_encode_one_word_trailing_tab(self):
4819        self._test_encode('hello  \t', 'hello  =09')
4820
4821    def test_encode_one_line_trailing_tab(self):
4822        self._test_encode('hello  \t\n', 'hello  =09\n')
4823
4824    def test_encode_trailing_space_before_maxlinelen(self):
4825        self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4826
4827    def test_encode_trailing_space_at_maxlinelen(self):
4828        self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4829
4830    def test_encode_trailing_space_beyond_maxlinelen(self):
4831        self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4832
4833    def test_encode_whitespace_lines(self):
4834        self._test_encode(' \n' * 5, '=20\n' * 5)
4835
4836    def test_encode_quoted_equals(self):
4837        self._test_encode('a = b', 'a =3D b')
4838
4839    def test_encode_one_long_string(self):
4840        self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4841
4842    def test_encode_one_long_line(self):
4843        self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4844
4845    def test_encode_one_very_long_line(self):
4846        self._test_encode('x' * 200 + '\n',
4847                2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4848
4849    def test_encode_shortest_maxlinelen(self):
4850        self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
4851
4852    def test_encode_maxlinelen_too_small(self):
4853        self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4854
4855    def test_encode(self):
4856        eq = self.assertEqual
4857        eq(quoprimime.body_encode(''), '')
4858        eq(quoprimime.body_encode('hello'), 'hello')
4859        # Test the binary flag
4860        eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
4861        # Test the maxlinelen arg
4862        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
4863xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4864 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4865x xxxx xxxx xxxx xxxx=20""")
4866        # Test the eol argument
4867        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4868           """\
4869xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4870 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4871x xxxx xxxx xxxx xxxx=20""")
4872        eq(quoprimime.body_encode("""\
4873one line
4874
4875two line"""), """\
4876one line
4877
4878two line""")
4879
4880
4881
4882# Test the Charset class
4883class TestCharset(unittest.TestCase):
4884    def tearDown(self):
4885        from email import charset as CharsetModule
4886        try:
4887            del CharsetModule.CHARSETS['fake']
4888        except KeyError:
4889            pass
4890
4891    def test_codec_encodeable(self):
4892        eq = self.assertEqual
4893        # Make sure us-ascii = no Unicode conversion
4894        c = Charset('us-ascii')
4895        eq(c.header_encode('Hello World!'), 'Hello World!')
4896        # Test 8-bit idempotency with us-ascii
4897        s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
4898        self.assertRaises(UnicodeError, c.header_encode, s)
4899        c = Charset('utf-8')
4900        eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
4901
4902    def test_body_encode(self):
4903        eq = self.assertEqual
4904        # Try a charset with QP body encoding
4905        c = Charset('iso-8859-1')
4906        eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
4907        # Try a charset with Base64 body encoding
4908        c = Charset('utf-8')
4909        eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
4910        # Try a charset with None body encoding
4911        c = Charset('us-ascii')
4912        eq('hello world', c.body_encode('hello world'))
4913        # Try the convert argument, where input codec != output codec
4914        c = Charset('euc-jp')
4915        # With apologies to Tokio Kikuchi ;)
4916        # XXX FIXME
4917##         try:
4918##             eq('\x1b$B5FCO;~IW\x1b(B',
4919##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4920##             eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4921##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4922##         except LookupError:
4923##             # We probably don't have the Japanese codecs installed
4924##             pass
4925        # Testing SF bug #625509, which we have to fake, since there are no
4926        # built-in encodings where the header encoding is QP but the body
4927        # encoding is not.
4928        from email import charset as CharsetModule
4929        CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
4930        c = Charset('fake')
4931        eq('hello world', c.body_encode('hello world'))
4932
4933    def test_unicode_charset_name(self):
4934        charset = Charset('us-ascii')
4935        self.assertEqual(str(charset), 'us-ascii')
4936        self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4937
4938
4939
4940# Test multilingual MIME headers.
4941class TestHeader(TestEmailBase):
4942    def test_simple(self):
4943        eq = self.ndiffAssertEqual
4944        h = Header('Hello World!')
4945        eq(h.encode(), 'Hello World!')
4946        h.append(' Goodbye World!')
4947        eq(h.encode(), 'Hello World!  Goodbye World!')
4948
4949    def test_simple_surprise(self):
4950        eq = self.ndiffAssertEqual
4951        h = Header('Hello World!')
4952        eq(h.encode(), 'Hello World!')
4953        h.append('Goodbye World!')
4954        eq(h.encode(), 'Hello World! Goodbye World!')
4955
4956    def test_header_needs_no_decoding(self):
4957        h = 'no decoding needed'
4958        self.assertEqual(decode_header(h), [(h, None)])
4959
4960    def test_long(self):
4961        h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4962                   maxlinelen=76)
4963        for l in h.encode(splitchars=' ').split('\n '):
4964            self.assertLessEqual(len(l), 76)
4965
4966    def test_multilingual(self):
4967        eq = self.ndiffAssertEqual
4968        g = Charset("iso-8859-1")
4969        cz = Charset("iso-8859-2")
4970        utf8 = Charset("utf-8")
4971        g_head = (b'Die Mieter treten hier ein werden mit einem '
4972                  b'Foerderband komfortabel den Korridor entlang, '
4973                  b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4974                  b'gegen die rotierenden Klingen bef\xf6rdert. ')
4975        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4976                   b'd\xf9vtipu.. ')
4977        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4978                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4979                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4980                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4981                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4982                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4983                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4984                     '\u3044\u307e\u3059\u3002')
4985        h = Header(g_head, g)
4986        h.append(cz_head, cz)
4987        h.append(utf8_head, utf8)
4988        enc = h.encode(maxlinelen=76)
4989        eq(enc, """\
4990=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4991 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4992 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4993 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
4994 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4995 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4996 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4997 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
4998 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4999 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
5000 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
5001        decoded = decode_header(enc)
5002        eq(len(decoded), 3)
5003        eq(decoded[0], (g_head, 'iso-8859-1'))
5004        eq(decoded[1], (cz_head, 'iso-8859-2'))
5005        eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
5006        ustr = str(h)
5007        eq(ustr,
5008           (b'Die Mieter treten hier ein werden mit einem Foerderband '
5009            b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
5010            b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
5011            b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
5012            b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
5013            b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
5014            b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
5015            b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
5016            b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
5017            b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
5018            b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
5019            b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
5020            b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
5021            b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
5022            b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
5023            b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
5024            ).decode('utf-8'))
5025        # Test make_header()
5026        newh = make_header(decode_header(enc))
5027        eq(newh, h)
5028
5029    def test_empty_header_encode(self):
5030        h = Header()
5031        self.assertEqual(h.encode(), '')
5032
5033    def test_header_ctor_default_args(self):
5034        eq = self.ndiffAssertEqual
5035        h = Header()
5036        eq(h, '')
5037        h.append('foo', Charset('iso-8859-1'))
5038        eq(h, 'foo')
5039
5040    def test_explicit_maxlinelen(self):
5041        eq = self.ndiffAssertEqual
5042        hstr = ('A very long line that must get split to something other '
5043                'than at the 76th character boundary to test the non-default '
5044                'behavior')
5045        h = Header(hstr)
5046        eq(h.encode(), '''\
5047A very long line that must get split to something other than at the 76th
5048 character boundary to test the non-default behavior''')
5049        eq(str(h), hstr)
5050        h = Header(hstr, header_name='Subject')
5051        eq(h.encode(), '''\
5052A very long line that must get split to something other than at the
5053 76th character boundary to test the non-default behavior''')
5054        eq(str(h), hstr)
5055        h = Header(hstr, maxlinelen=1024, header_name='Subject')
5056        eq(h.encode(), hstr)
5057        eq(str(h), hstr)
5058
5059    def test_quopri_splittable(self):
5060        eq = self.ndiffAssertEqual
5061        h = Header(charset='iso-8859-1', maxlinelen=20)
5062        x = 'xxxx ' * 20
5063        h.append(x)
5064        s = h.encode()
5065        eq(s, """\
5066=?iso-8859-1?q?xxx?=
5067 =?iso-8859-1?q?x_?=
5068 =?iso-8859-1?q?xx?=
5069 =?iso-8859-1?q?xx?=
5070 =?iso-8859-1?q?_x?=
5071 =?iso-8859-1?q?xx?=
5072 =?iso-8859-1?q?x_?=
5073 =?iso-8859-1?q?xx?=
5074 =?iso-8859-1?q?xx?=
5075 =?iso-8859-1?q?_x?=
5076 =?iso-8859-1?q?xx?=
5077 =?iso-8859-1?q?x_?=
5078 =?iso-8859-1?q?xx?=
5079 =?iso-8859-1?q?xx?=
5080 =?iso-8859-1?q?_x?=
5081 =?iso-8859-1?q?xx?=
5082 =?iso-8859-1?q?x_?=
5083 =?iso-8859-1?q?xx?=
5084 =?iso-8859-1?q?xx?=
5085 =?iso-8859-1?q?_x?=
5086 =?iso-8859-1?q?xx?=
5087 =?iso-8859-1?q?x_?=
5088 =?iso-8859-1?q?xx?=
5089 =?iso-8859-1?q?xx?=
5090 =?iso-8859-1?q?_x?=
5091 =?iso-8859-1?q?xx?=
5092 =?iso-8859-1?q?x_?=
5093 =?iso-8859-1?q?xx?=
5094 =?iso-8859-1?q?xx?=
5095 =?iso-8859-1?q?_x?=
5096 =?iso-8859-1?q?xx?=
5097 =?iso-8859-1?q?x_?=
5098 =?iso-8859-1?q?xx?=
5099 =?iso-8859-1?q?xx?=
5100 =?iso-8859-1?q?_x?=
5101 =?iso-8859-1?q?xx?=
5102 =?iso-8859-1?q?x_?=
5103 =?iso-8859-1?q?xx?=
5104 =?iso-8859-1?q?xx?=
5105 =?iso-8859-1?q?_x?=
5106 =?iso-8859-1?q?xx?=
5107 =?iso-8859-1?q?x_?=
5108 =?iso-8859-1?q?xx?=
5109 =?iso-8859-1?q?xx?=
5110 =?iso-8859-1?q?_x?=
5111 =?iso-8859-1?q?xx?=
5112 =?iso-8859-1?q?x_?=
5113 =?iso-8859-1?q?xx?=
5114 =?iso-8859-1?q?xx?=
5115 =?iso-8859-1?q?_?=""")
5116        eq(x, str(make_header(decode_header(s))))
5117        h = Header(charset='iso-8859-1', maxlinelen=40)
5118        h.append('xxxx ' * 20)
5119        s = h.encode()
5120        eq(s, """\
5121=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
5122 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
5123 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
5124 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
5125 =?iso-8859-1?q?_xxxx_xxxx_?=""")
5126        eq(x, str(make_header(decode_header(s))))
5127
5128    def test_base64_splittable(self):
5129        eq = self.ndiffAssertEqual
5130        h = Header(charset='koi8-r', maxlinelen=20)
5131        x = 'xxxx ' * 20
5132        h.append(x)
5133        s = h.encode()
5134        eq(s, """\
5135=?koi8-r?b?eHh4?=
5136 =?koi8-r?b?eCB4?=
5137 =?koi8-r?b?eHh4?=
5138 =?koi8-r?b?IHh4?=
5139 =?koi8-r?b?eHgg?=
5140 =?koi8-r?b?eHh4?=
5141 =?koi8-r?b?eCB4?=
5142 =?koi8-r?b?eHh4?=
5143 =?koi8-r?b?IHh4?=
5144 =?koi8-r?b?eHgg?=
5145 =?koi8-r?b?eHh4?=
5146 =?koi8-r?b?eCB4?=
5147 =?koi8-r?b?eHh4?=
5148 =?koi8-r?b?IHh4?=
5149 =?koi8-r?b?eHgg?=
5150 =?koi8-r?b?eHh4?=
5151 =?koi8-r?b?eCB4?=
5152 =?koi8-r?b?eHh4?=
5153 =?koi8-r?b?IHh4?=
5154 =?koi8-r?b?eHgg?=
5155 =?koi8-r?b?eHh4?=
5156 =?koi8-r?b?eCB4?=
5157 =?koi8-r?b?eHh4?=
5158 =?koi8-r?b?IHh4?=
5159 =?koi8-r?b?eHgg?=
5160 =?koi8-r?b?eHh4?=
5161 =?koi8-r?b?eCB4?=
5162 =?koi8-r?b?eHh4?=
5163 =?koi8-r?b?IHh4?=
5164 =?koi8-r?b?eHgg?=
5165 =?koi8-r?b?eHh4?=
5166 =?koi8-r?b?eCB4?=
5167 =?koi8-r?b?eHh4?=
5168 =?koi8-r?b?IA==?=""")
5169        eq(x, str(make_header(decode_header(s))))
5170        h = Header(charset='koi8-r', maxlinelen=40)
5171        h.append(x)
5172        s = h.encode()
5173        eq(s, """\
5174=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
5175 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
5176 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
5177 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
5178 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
5179 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
5180        eq(x, str(make_header(decode_header(s))))
5181
5182    def test_us_ascii_header(self):
5183        eq = self.assertEqual
5184        s = 'hello'
5185        x = decode_header(s)
5186        eq(x, [('hello', None)])
5187        h = make_header(x)
5188        eq(s, h.encode())
5189
5190    def test_string_charset(self):
5191        eq = self.assertEqual
5192        h = Header()
5193        h.append('hello', 'iso-8859-1')
5194        eq(h, 'hello')
5195
5196##    def test_unicode_error(self):
5197##        raises = self.assertRaises
5198##        raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
5199##        raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
5200##        h = Header()
5201##        raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
5202##        raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
5203##        raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
5204
5205    def test_utf8_shortest(self):
5206        eq = self.assertEqual
5207        h = Header('p\xf6stal', 'utf-8')
5208        eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
5209        h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
5210        eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
5211
5212    def test_bad_8bit_header(self):
5213        raises = self.assertRaises
5214        eq = self.assertEqual
5215        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
5216        raises(UnicodeError, Header, x)
5217        h = Header()
5218        raises(UnicodeError, h.append, x)
5219        e = x.decode('utf-8', 'replace')
5220        eq(str(Header(x, errors='replace')), e)
5221        h.append(x, errors='replace')
5222        eq(str(h), e)
5223
5224    def test_escaped_8bit_header(self):
5225        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
5226        e = x.decode('ascii', 'surrogateescape')
5227        h = Header(e, charset=email.charset.UNKNOWN8BIT)
5228        self.assertEqual(str(h),
5229                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
5230        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
5231
5232    def test_header_handles_binary_unknown8bit(self):
5233        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
5234        h = Header(x, charset=email.charset.UNKNOWN8BIT)
5235        self.assertEqual(str(h),
5236                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
5237        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
5238
5239    def test_make_header_handles_binary_unknown8bit(self):
5240        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
5241        h = Header(x, charset=email.charset.UNKNOWN8BIT)
5242        h2 = email.header.make_header(email.header.decode_header(h))
5243        self.assertEqual(str(h2),
5244                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
5245        self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
5246
5247    def test_modify_returned_list_does_not_change_header(self):
5248        h = Header('test')
5249        chunks = email.header.decode_header(h)
5250        chunks.append(('ascii', 'test2'))
5251        self.assertEqual(str(h), 'test')
5252
5253    def test_encoded_adjacent_nonencoded(self):
5254        eq = self.assertEqual
5255        h = Header()
5256        h.append('hello', 'iso-8859-1')
5257        h.append('world')
5258        s = h.encode()
5259        eq(s, '=?iso-8859-1?q?hello?= world')
5260        h = make_header(decode_header(s))
5261        eq(h.encode(), s)
5262
5263    def test_whitespace_keeper(self):
5264        eq = self.assertEqual
5265        s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
5266        parts = decode_header(s)
5267        eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
5268        hdr = make_header(parts)
5269        eq(hdr.encode(),
5270           'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
5271
5272    def test_broken_base64_header(self):
5273        raises = self.assertRaises
5274        s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
5275        raises(errors.HeaderParseError, decode_header, s)
5276
5277    def test_shift_jis_charset(self):
5278        h = Header('文', charset='shift_jis')
5279        self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
5280
5281    def test_flatten_header_with_no_value(self):
5282        # Issue 11401 (regression from email 4.x)  Note that the space after
5283        # the header doesn't reflect the input, but this is also the way
5284        # email 4.x behaved.  At some point it would be nice to fix that.
5285        msg = email.message_from_string("EmptyHeader:")
5286        self.assertEqual(str(msg), "EmptyHeader: \n\n")
5287
5288    def test_encode_preserves_leading_ws_on_value(self):
5289        msg = Message()
5290        msg['SomeHeader'] = '   value with leading ws'
5291        self.assertEqual(str(msg), "SomeHeader:    value with leading ws\n\n")
5292
5293    def test_whitespace_header(self):
5294        self.assertEqual(Header(' ').encode(), ' ')
5295
5296
5297
5298# Test RFC 2231 header parameters (en/de)coding
5299class TestRFC2231(TestEmailBase):
5300
5301    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5302    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5303    def test_get_param(self):
5304        eq = self.assertEqual
5305        msg = self._msgobj('msg_29.txt')
5306        eq(msg.get_param('title'),
5307           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5308        eq(msg.get_param('title', unquote=False),
5309           ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
5310
5311    def test_set_param(self):
5312        eq = self.ndiffAssertEqual
5313        msg = Message()
5314        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5315                      charset='us-ascii')
5316        eq(msg.get_param('title'),
5317           ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
5318        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5319                      charset='us-ascii', language='en')
5320        eq(msg.get_param('title'),
5321           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5322        msg = self._msgobj('msg_01.txt')
5323        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5324                      charset='us-ascii', language='en')
5325        eq(msg.as_string(maxheaderlen=78), """\
5326Return-Path: <bbb@zzz.org>
5327Delivered-To: bbb@zzz.org
5328Received: by mail.zzz.org (Postfix, from userid 889)
5329\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5330MIME-Version: 1.0
5331Content-Transfer-Encoding: 7bit
5332Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
5333From: bbb@ddd.com (John X. Doe)
5334To: bbb@zzz.org
5335Subject: This is a test message
5336Date: Fri, 4 May 2001 14:05:44 -0400
5337Content-Type: text/plain; charset=us-ascii;
5338 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5339
5340
5341Hi,
5342
5343Do you like this message?
5344
5345-Me
5346""")
5347
5348    def test_set_param_requote(self):
5349        msg = Message()
5350        msg.set_param('title', 'foo')
5351        self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
5352        msg.set_param('title', 'bar', requote=False)
5353        self.assertEqual(msg['content-type'], 'text/plain; title=bar')
5354        # tspecial is still quoted.
5355        msg.set_param('title', "(bar)bell", requote=False)
5356        self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
5357
5358    def test_del_param(self):
5359        eq = self.ndiffAssertEqual
5360        msg = self._msgobj('msg_01.txt')
5361        msg.set_param('foo', 'bar', charset='us-ascii', language='en')
5362        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5363            charset='us-ascii', language='en')
5364        msg.del_param('foo', header='Content-Type')
5365        eq(msg.as_string(maxheaderlen=78), """\
5366Return-Path: <bbb@zzz.org>
5367Delivered-To: bbb@zzz.org
5368Received: by mail.zzz.org (Postfix, from userid 889)
5369\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5370MIME-Version: 1.0
5371Content-Transfer-Encoding: 7bit
5372Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
5373From: bbb@ddd.com (John X. Doe)
5374To: bbb@zzz.org
5375Subject: This is a test message
5376Date: Fri, 4 May 2001 14:05:44 -0400
5377Content-Type: text/plain; charset="us-ascii";
5378 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5379
5380
5381Hi,
5382
5383Do you like this message?
5384
5385-Me
5386""")
5387
5388    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
5389    # I changed the charset name, though, because the one in the file isn't
5390    # a legal charset name.  Should add a test for an illegal charset.
5391    def test_rfc2231_get_content_charset(self):
5392        eq = self.assertEqual
5393        msg = self._msgobj('msg_32.txt')
5394        eq(msg.get_content_charset(), 'us-ascii')
5395
5396    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
5397    def test_rfc2231_parse_rfc_quoting(self):
5398        m = textwrap.dedent('''\
5399            Content-Disposition: inline;
5400            \tfilename*0*=''This%20is%20even%20more%20;
5401            \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
5402            \tfilename*2="is it not.pdf"
5403
5404            ''')
5405        msg = email.message_from_string(m)
5406        self.assertEqual(msg.get_filename(),
5407                         'This is even more ***fun*** is it not.pdf')
5408        self.assertEqual(m, msg.as_string())
5409
5410    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5411    def test_rfc2231_parse_extra_quoting(self):
5412        m = textwrap.dedent('''\
5413            Content-Disposition: inline;
5414            \tfilename*0*="''This%20is%20even%20more%20";
5415            \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5416            \tfilename*2="is it not.pdf"
5417
5418            ''')
5419        msg = email.message_from_string(m)
5420        self.assertEqual(msg.get_filename(),
5421                         'This is even more ***fun*** is it not.pdf')
5422        self.assertEqual(m, msg.as_string())
5423
5424    # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
5425    # but new test uses *0* because otherwise lang/charset is not valid.
5426    # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
5427    def test_rfc2231_no_language_or_charset(self):
5428        m = '''\
5429Content-Transfer-Encoding: 8bit
5430Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
5431Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
5432
5433'''
5434        msg = email.message_from_string(m)
5435        param = msg.get_param('NAME')
5436        self.assertNotIsInstance(param, tuple)
5437        self.assertEqual(
5438            param,
5439            'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
5440
5441    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
5442    def test_rfc2231_no_language_or_charset_in_filename(self):
5443        m = '''\
5444Content-Disposition: inline;
5445\tfilename*0*="''This%20is%20even%20more%20";
5446\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5447\tfilename*2="is it not.pdf"
5448
5449'''
5450        msg = email.message_from_string(m)
5451        self.assertEqual(msg.get_filename(),
5452                         'This is even more ***fun*** is it not.pdf')
5453
5454    # Duplicate of previous test?
5455    def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
5456        m = '''\
5457Content-Disposition: inline;
5458\tfilename*0*="''This%20is%20even%20more%20";
5459\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5460\tfilename*2="is it not.pdf"
5461
5462'''
5463        msg = email.message_from_string(m)
5464        self.assertEqual(msg.get_filename(),
5465                         'This is even more ***fun*** is it not.pdf')
5466
5467    # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
5468    # but the test below is wrong (the first part should be decoded).
5469    def test_rfc2231_partly_encoded(self):
5470        m = '''\
5471Content-Disposition: inline;
5472\tfilename*0="''This%20is%20even%20more%20";
5473\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5474\tfilename*2="is it not.pdf"
5475
5476'''
5477        msg = email.message_from_string(m)
5478        self.assertEqual(
5479            msg.get_filename(),
5480            'This%20is%20even%20more%20***fun*** is it not.pdf')
5481
5482    def test_rfc2231_partly_nonencoded(self):
5483        m = '''\
5484Content-Disposition: inline;
5485\tfilename*0="This%20is%20even%20more%20";
5486\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
5487\tfilename*2="is it not.pdf"
5488
5489'''
5490        msg = email.message_from_string(m)
5491        self.assertEqual(
5492            msg.get_filename(),
5493            'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
5494
5495    def test_rfc2231_no_language_or_charset_in_boundary(self):
5496        m = '''\
5497Content-Type: multipart/alternative;
5498\tboundary*0*="''This%20is%20even%20more%20";
5499\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
5500\tboundary*2="is it not.pdf"
5501
5502'''
5503        msg = email.message_from_string(m)
5504        self.assertEqual(msg.get_boundary(),
5505                         'This is even more ***fun*** is it not.pdf')
5506
5507    def test_rfc2231_no_language_or_charset_in_charset(self):
5508        # This is a nonsensical charset value, but tests the code anyway
5509        m = '''\
5510Content-Type: text/plain;
5511\tcharset*0*="This%20is%20even%20more%20";
5512\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
5513\tcharset*2="is it not.pdf"
5514
5515'''
5516        msg = email.message_from_string(m)
5517        self.assertEqual(msg.get_content_charset(),
5518                         'this is even more ***fun*** is it not.pdf')
5519
5520    # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
5521    def test_rfc2231_bad_encoding_in_filename(self):
5522        m = '''\
5523Content-Disposition: inline;
5524\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
5525\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5526\tfilename*2="is it not.pdf"
5527
5528'''
5529        msg = email.message_from_string(m)
5530        self.assertEqual(msg.get_filename(),
5531                         'This is even more ***fun*** is it not.pdf')
5532
5533    def test_rfc2231_bad_encoding_in_charset(self):
5534        m = """\
5535Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
5536
5537"""
5538        msg = email.message_from_string(m)
5539        # This should return None because non-ascii characters in the charset
5540        # are not allowed.
5541        self.assertEqual(msg.get_content_charset(), None)
5542
5543    def test_rfc2231_bad_character_in_charset(self):
5544        m = """\
5545Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
5546
5547"""
5548        msg = email.message_from_string(m)
5549        # This should return None because non-ascii characters in the charset
5550        # are not allowed.
5551        self.assertEqual(msg.get_content_charset(), None)
5552
5553    def test_rfc2231_bad_character_in_filename(self):
5554        m = '''\
5555Content-Disposition: inline;
5556\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
5557\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5558\tfilename*2*="is it not.pdf%E2"
5559
5560'''
5561        msg = email.message_from_string(m)
5562        self.assertEqual(msg.get_filename(),
5563                         'This is even more ***fun*** is it not.pdf\ufffd')
5564
5565    def test_rfc2231_unknown_encoding(self):
5566        m = """\
5567Content-Transfer-Encoding: 8bit
5568Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
5569
5570"""
5571        msg = email.message_from_string(m)
5572        self.assertEqual(msg.get_filename(), 'myfile.txt')
5573
5574    def test_rfc2231_bad_character_in_encoding(self):
5575        m = """\
5576Content-Transfer-Encoding: 8bit
5577Content-Disposition: inline; filename*=utf-8\udce2\udc80\udc9d''myfile.txt
5578
5579"""
5580        msg = email.message_from_string(m)
5581        self.assertEqual(msg.get_filename(), 'myfile.txt')
5582
5583    def test_rfc2231_single_tick_in_filename_extended(self):
5584        eq = self.assertEqual
5585        m = """\
5586Content-Type: application/x-foo;
5587\tname*0*=\"Frank's\"; name*1*=\" Document\"
5588
5589"""
5590        msg = email.message_from_string(m)
5591        charset, language, s = msg.get_param('name')
5592        eq(charset, None)
5593        eq(language, None)
5594        eq(s, "Frank's Document")
5595
5596    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5597    def test_rfc2231_single_tick_in_filename(self):
5598        m = """\
5599Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
5600
5601"""
5602        msg = email.message_from_string(m)
5603        param = msg.get_param('name')
5604        self.assertNotIsInstance(param, tuple)
5605        self.assertEqual(param, "Frank's Document")
5606
5607    def test_rfc2231_missing_tick(self):
5608        m = '''\
5609Content-Disposition: inline;
5610\tfilename*0*="'This%20is%20broken";
5611'''
5612        msg = email.message_from_string(m)
5613        self.assertEqual(
5614            msg.get_filename(),
5615            "'This is broken")
5616
5617    def test_rfc2231_missing_tick_with_encoded_non_ascii(self):
5618        m = '''\
5619Content-Disposition: inline;
5620\tfilename*0*="'This%20is%E2broken";
5621'''
5622        msg = email.message_from_string(m)
5623        self.assertEqual(
5624            msg.get_filename(),
5625            "'This is\ufffdbroken")
5626
5627    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
5628    def test_rfc2231_tick_attack_extended(self):
5629        eq = self.assertEqual
5630        m = """\
5631Content-Type: application/x-foo;
5632\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
5633
5634"""
5635        msg = email.message_from_string(m)
5636        charset, language, s = msg.get_param('name')
5637        eq(charset, 'us-ascii')
5638        eq(language, 'en-us')
5639        eq(s, "Frank's Document")
5640
5641    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
5642    def test_rfc2231_tick_attack(self):
5643        m = """\
5644Content-Type: application/x-foo;
5645\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5646
5647"""
5648        msg = email.message_from_string(m)
5649        param = msg.get_param('name')
5650        self.assertNotIsInstance(param, tuple)
5651        self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5652
5653    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
5654    def test_rfc2231_no_extended_values(self):
5655        eq = self.assertEqual
5656        m = """\
5657Content-Type: application/x-foo; name=\"Frank's Document\"
5658
5659"""
5660        msg = email.message_from_string(m)
5661        eq(msg.get_param('name'), "Frank's Document")
5662
5663    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
5664    def test_rfc2231_encoded_then_unencoded_segments(self):
5665        eq = self.assertEqual
5666        m = """\
5667Content-Type: application/x-foo;
5668\tname*0*=\"us-ascii'en-us'My\";
5669\tname*1=\" Document\";
5670\tname*2*=\" For You\"
5671
5672"""
5673        msg = email.message_from_string(m)
5674        charset, language, s = msg.get_param('name')
5675        eq(charset, 'us-ascii')
5676        eq(language, 'en-us')
5677        eq(s, 'My Document For You')
5678
5679    # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5680    # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
5681    def test_rfc2231_unencoded_then_encoded_segments(self):
5682        eq = self.assertEqual
5683        m = """\
5684Content-Type: application/x-foo;
5685\tname*0=\"us-ascii'en-us'My\";
5686\tname*1*=\" Document\";
5687\tname*2*=\" For You\"
5688
5689"""
5690        msg = email.message_from_string(m)
5691        charset, language, s = msg.get_param('name')
5692        eq(charset, 'us-ascii')
5693        eq(language, 'en-us')
5694        eq(s, 'My Document For You')
5695
5696    def test_should_not_hang_on_invalid_ew_messages(self):
5697        messages = ["""From: user@host.com
5698To: user@host.com
5699Bad-Header:
5700 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
5701 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
5702 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
5703
5704Hello!
5705""", """From: ����� �������� <xxx@xxx>
5706To: "xxx" <xxx@xxx>
5707Subject:   ��� ���������� ����� ����� � ��������� �� ����
5708MIME-Version: 1.0
5709Content-Type: text/plain; charset="windows-1251";
5710Content-Transfer-Encoding: 8bit
5711
5712�� ����� � ���� ������ ��� ��������
5713"""]
5714        for m in messages:
5715            with self.subTest(m=m):
5716                msg = email.message_from_string(m)
5717
5718
5719# Tests to ensure that signed parts of an email are completely preserved, as
5720# required by RFC1847 section 2.1.  Note that these are incomplete, because the
5721# email package does not currently always preserve the body.  See issue 1670765.
5722class TestSigned(TestEmailBase):
5723
5724    def _msg_and_obj(self, filename):
5725        with openfile(filename, encoding="utf-8") as fp:
5726            original = fp.read()
5727            msg = email.message_from_string(original)
5728        return original, msg
5729
5730    def _signed_parts_eq(self, original, result):
5731        # Extract the first mime part of each message
5732        import re
5733        repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5734        inpart = repart.search(original).group(2)
5735        outpart = repart.search(result).group(2)
5736        self.assertEqual(outpart, inpart)
5737
5738    def test_long_headers_as_string(self):
5739        original, msg = self._msg_and_obj('msg_45.txt')
5740        result = msg.as_string()
5741        self._signed_parts_eq(original, result)
5742
5743    def test_long_headers_as_string_maxheaderlen(self):
5744        original, msg = self._msg_and_obj('msg_45.txt')
5745        result = msg.as_string(maxheaderlen=60)
5746        self._signed_parts_eq(original, result)
5747
5748    def test_long_headers_flatten(self):
5749        original, msg = self._msg_and_obj('msg_45.txt')
5750        fp = StringIO()
5751        Generator(fp).flatten(msg)
5752        result = fp.getvalue()
5753        self._signed_parts_eq(original, result)
5754
5755class TestHeaderRegistry(TestEmailBase):
5756    # See issue gh-93010.
5757    def test_HeaderRegistry(self):
5758        reg = HeaderRegistry()
5759        a = reg('Content-Disposition', 'attachment; 0*00="foo"')
5760        self.assertIsInstance(a.defects[0], errors.InvalidHeaderDefect)
5761
5762if __name__ == '__main__':
5763    unittest.main()
5764