• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2001-2010 Python Software Foundation
2# Contact: email-sig@python.org
3# email package unit tests
4
5import re
6import time
7import base64
8import unittest
9import textwrap
10
11from io import StringIO, BytesIO
12from itertools import chain
13from random import choice
14from threading import Thread
15from unittest.mock import patch
16
17import email
18import email.policy
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator, BytesGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email.mime.nonmultipart import MIMENonMultipart
33from email import utils
34from email import errors
35from email import encoders
36from email import iterators
37from email import base64mime
38from email import quoprimime
39
40from test.support import unlink, start_threads
41from test.test_email import openfile, TestEmailBase
42
43# These imports are documented to work, but we are testing them using a
44# different path, so we import them here just to make sure they are importable.
45from email.parser import FeedParser, BytesFeedParser
46
47NL = '\n'
48EMPTYSTRING = ''
49SPACE = ' '
50
51
52# Test various aspects of the Message class's API
53class TestMessageAPI(TestEmailBase):
54    def test_get_all(self):
55        eq = self.assertEqual
56        msg = self._msgobj('msg_20.txt')
57        eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
58        eq(msg.get_all('xx', 'n/a'), 'n/a')
59
60    def test_getset_charset(self):
61        eq = self.assertEqual
62        msg = Message()
63        eq(msg.get_charset(), None)
64        charset = Charset('iso-8859-1')
65        msg.set_charset(charset)
66        eq(msg['mime-version'], '1.0')
67        eq(msg.get_content_type(), 'text/plain')
68        eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
69        eq(msg.get_param('charset'), 'iso-8859-1')
70        eq(msg['content-transfer-encoding'], 'quoted-printable')
71        eq(msg.get_charset().input_charset, 'iso-8859-1')
72        # Remove the charset
73        msg.set_charset(None)
74        eq(msg.get_charset(), None)
75        eq(msg['content-type'], 'text/plain')
76        # Try adding a charset when there's already MIME headers present
77        msg = Message()
78        msg['MIME-Version'] = '2.0'
79        msg['Content-Type'] = 'text/x-weird'
80        msg['Content-Transfer-Encoding'] = 'quinted-puntable'
81        msg.set_charset(charset)
82        eq(msg['mime-version'], '2.0')
83        eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
84        eq(msg['content-transfer-encoding'], 'quinted-puntable')
85
86    def test_set_charset_from_string(self):
87        eq = self.assertEqual
88        msg = Message()
89        msg.set_charset('us-ascii')
90        eq(msg.get_charset().input_charset, 'us-ascii')
91        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
92
93    def test_set_payload_with_charset(self):
94        msg = Message()
95        charset = Charset('iso-8859-1')
96        msg.set_payload('This is a string payload', charset)
97        self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
98
99    def test_set_payload_with_8bit_data_and_charset(self):
100        data = b'\xd0\x90\xd0\x91\xd0\x92'
101        charset = Charset('utf-8')
102        msg = Message()
103        msg.set_payload(data, charset)
104        self.assertEqual(msg['content-transfer-encoding'], 'base64')
105        self.assertEqual(msg.get_payload(decode=True), data)
106        self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
107
108    def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
109        data = b'\xd0\x90\xd0\x91\xd0\x92'
110        charset = Charset('utf-8')
111        charset.body_encoding = None # Disable base64 encoding
112        msg = Message()
113        msg.set_payload(data.decode('utf-8'), charset)
114        self.assertEqual(msg['content-transfer-encoding'], '8bit')
115        self.assertEqual(msg.get_payload(decode=True), data)
116
117    def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
118        data = b'\xd0\x90\xd0\x91\xd0\x92'
119        charset = Charset('utf-8')
120        charset.body_encoding = None # Disable base64 encoding
121        msg = Message()
122        msg.set_payload(data, charset)
123        self.assertEqual(msg['content-transfer-encoding'], '8bit')
124        self.assertEqual(msg.get_payload(decode=True), data)
125
126    def test_set_payload_to_list(self):
127        msg = Message()
128        msg.set_payload([])
129        self.assertEqual(msg.get_payload(), [])
130
131    def test_attach_when_payload_is_string(self):
132        msg = Message()
133        msg['Content-Type'] = 'multipart/mixed'
134        msg.set_payload('string payload')
135        sub_msg = MIMEMessage(Message())
136        self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart",
137                               msg.attach, sub_msg)
138
139    def test_get_charsets(self):
140        eq = self.assertEqual
141
142        msg = self._msgobj('msg_08.txt')
143        charsets = msg.get_charsets()
144        eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
145
146        msg = self._msgobj('msg_09.txt')
147        charsets = msg.get_charsets('dingbat')
148        eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
149                      'koi8-r'])
150
151        msg = self._msgobj('msg_12.txt')
152        charsets = msg.get_charsets()
153        eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
154                      'iso-8859-3', 'us-ascii', 'koi8-r'])
155
156    def test_get_filename(self):
157        eq = self.assertEqual
158
159        msg = self._msgobj('msg_04.txt')
160        filenames = [p.get_filename() for p in msg.get_payload()]
161        eq(filenames, ['msg.txt', 'msg.txt'])
162
163        msg = self._msgobj('msg_07.txt')
164        subpart = msg.get_payload(1)
165        eq(subpart.get_filename(), 'dingusfish.gif')
166
167    def test_get_filename_with_name_parameter(self):
168        eq = self.assertEqual
169
170        msg = self._msgobj('msg_44.txt')
171        filenames = [p.get_filename() for p in msg.get_payload()]
172        eq(filenames, ['msg.txt', 'msg.txt'])
173
174    def test_get_boundary(self):
175        eq = self.assertEqual
176        msg = self._msgobj('msg_07.txt')
177        # No quotes!
178        eq(msg.get_boundary(), 'BOUNDARY')
179
180    def test_set_boundary(self):
181        eq = self.assertEqual
182        # This one has no existing boundary parameter, but the Content-Type:
183        # header appears fifth.
184        msg = self._msgobj('msg_01.txt')
185        msg.set_boundary('BOUNDARY')
186        header, value = msg.items()[4]
187        eq(header.lower(), 'content-type')
188        eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
189        # This one has a Content-Type: header, with a boundary, stuck in the
190        # middle of its headers.  Make sure the order is preserved; it should
191        # be fifth.
192        msg = self._msgobj('msg_04.txt')
193        msg.set_boundary('BOUNDARY')
194        header, value = msg.items()[4]
195        eq(header.lower(), 'content-type')
196        eq(value, 'multipart/mixed; boundary="BOUNDARY"')
197        # And this one has no Content-Type: header at all.
198        msg = self._msgobj('msg_03.txt')
199        self.assertRaises(errors.HeaderParseError,
200                          msg.set_boundary, 'BOUNDARY')
201
202    def test_make_boundary(self):
203        msg = MIMEMultipart('form-data')
204        # Note that when the boundary gets created is an implementation
205        # detail and might change.
206        self.assertEqual(msg.items()[0][1], 'multipart/form-data')
207        # Trigger creation of boundary
208        msg.as_string()
209        self.assertEqual(msg.items()[0][1][:33],
210                        'multipart/form-data; boundary="==')
211        # XXX: there ought to be tests of the uniqueness of the boundary, too.
212
213    def test_message_rfc822_only(self):
214        # Issue 7970: message/rfc822 not in multipart parsed by
215        # HeaderParser caused an exception when flattened.
216        with openfile('msg_46.txt') as fp:
217            msgdata = fp.read()
218        parser = HeaderParser()
219        msg = parser.parsestr(msgdata)
220        out = StringIO()
221        gen = Generator(out, True, 0)
222        gen.flatten(msg, False)
223        self.assertEqual(out.getvalue(), msgdata)
224
225    def test_byte_message_rfc822_only(self):
226        # Make sure new bytes header parser also passes this.
227        with openfile('msg_46.txt') as fp:
228            msgdata = fp.read().encode('ascii')
229        parser = email.parser.BytesHeaderParser()
230        msg = parser.parsebytes(msgdata)
231        out = BytesIO()
232        gen = email.generator.BytesGenerator(out)
233        gen.flatten(msg)
234        self.assertEqual(out.getvalue(), msgdata)
235
236    def test_get_decoded_payload(self):
237        eq = self.assertEqual
238        msg = self._msgobj('msg_10.txt')
239        # The outer message is a multipart
240        eq(msg.get_payload(decode=True), None)
241        # Subpart 1 is 7bit encoded
242        eq(msg.get_payload(0).get_payload(decode=True),
243           b'This is a 7bit encoded message.\n')
244        # Subpart 2 is quopri
245        eq(msg.get_payload(1).get_payload(decode=True),
246           b'\xa1This is a Quoted Printable encoded message!\n')
247        # Subpart 3 is base64
248        eq(msg.get_payload(2).get_payload(decode=True),
249           b'This is a Base64 encoded message.')
250        # Subpart 4 is base64 with a trailing newline, which
251        # used to be stripped (issue 7143).
252        eq(msg.get_payload(3).get_payload(decode=True),
253           b'This is a Base64 encoded message.\n')
254        # Subpart 5 has no Content-Transfer-Encoding: header.
255        eq(msg.get_payload(4).get_payload(decode=True),
256           b'This has no Content-Transfer-Encoding: header.\n')
257
258    def test_get_decoded_uu_payload(self):
259        eq = self.assertEqual
260        msg = Message()
261        msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
262        for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
263            msg['content-transfer-encoding'] = cte
264            eq(msg.get_payload(decode=True), b'hello world')
265        # Now try some bogus data
266        msg.set_payload('foo')
267        eq(msg.get_payload(decode=True), b'foo')
268
269    def test_get_payload_n_raises_on_non_multipart(self):
270        msg = Message()
271        self.assertRaises(TypeError, msg.get_payload, 1)
272
273    def test_decoded_generator(self):
274        eq = self.assertEqual
275        msg = self._msgobj('msg_07.txt')
276        with openfile('msg_17.txt') as fp:
277            text = fp.read()
278        s = StringIO()
279        g = DecodedGenerator(s)
280        g.flatten(msg)
281        eq(s.getvalue(), text)
282
283    def test__contains__(self):
284        msg = Message()
285        msg['From'] = 'Me'
286        msg['to'] = 'You'
287        # Check for case insensitivity
288        self.assertIn('from', msg)
289        self.assertIn('From', msg)
290        self.assertIn('FROM', msg)
291        self.assertIn('to', msg)
292        self.assertIn('To', msg)
293        self.assertIn('TO', msg)
294
295    def test_as_string(self):
296        msg = self._msgobj('msg_01.txt')
297        with openfile('msg_01.txt') as fp:
298            text = fp.read()
299        self.assertEqual(text, str(msg))
300        fullrepr = msg.as_string(unixfrom=True)
301        lines = fullrepr.split('\n')
302        self.assertTrue(lines[0].startswith('From '))
303        self.assertEqual(text, NL.join(lines[1:]))
304
305    def test_as_string_policy(self):
306        msg = self._msgobj('msg_01.txt')
307        newpolicy = msg.policy.clone(linesep='\r\n')
308        fullrepr = msg.as_string(policy=newpolicy)
309        s = StringIO()
310        g = Generator(s, policy=newpolicy)
311        g.flatten(msg)
312        self.assertEqual(fullrepr, s.getvalue())
313
314    def test_as_bytes(self):
315        msg = self._msgobj('msg_01.txt')
316        with openfile('msg_01.txt') as fp:
317            data = fp.read().encode('ascii')
318        self.assertEqual(data, bytes(msg))
319        fullrepr = msg.as_bytes(unixfrom=True)
320        lines = fullrepr.split(b'\n')
321        self.assertTrue(lines[0].startswith(b'From '))
322        self.assertEqual(data, b'\n'.join(lines[1:]))
323
324    def test_as_bytes_policy(self):
325        msg = self._msgobj('msg_01.txt')
326        newpolicy = msg.policy.clone(linesep='\r\n')
327        fullrepr = msg.as_bytes(policy=newpolicy)
328        s = BytesIO()
329        g = BytesGenerator(s,policy=newpolicy)
330        g.flatten(msg)
331        self.assertEqual(fullrepr, s.getvalue())
332
333    # test_headerregistry.TestContentTypeHeader.bad_params
334    def test_bad_param(self):
335        msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
336        self.assertEqual(msg.get_param('baz'), '')
337
338    def test_missing_filename(self):
339        msg = email.message_from_string("From: foo\n")
340        self.assertEqual(msg.get_filename(), None)
341
342    def test_bogus_filename(self):
343        msg = email.message_from_string(
344        "Content-Disposition: blarg; filename\n")
345        self.assertEqual(msg.get_filename(), '')
346
347    def test_missing_boundary(self):
348        msg = email.message_from_string("From: foo\n")
349        self.assertEqual(msg.get_boundary(), None)
350
351    def test_get_params(self):
352        eq = self.assertEqual
353        msg = email.message_from_string(
354            'X-Header: foo=one; bar=two; baz=three\n')
355        eq(msg.get_params(header='x-header'),
356           [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
357        msg = email.message_from_string(
358            'X-Header: foo; bar=one; baz=two\n')
359        eq(msg.get_params(header='x-header'),
360           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
361        eq(msg.get_params(), None)
362        msg = email.message_from_string(
363            'X-Header: foo; bar="one"; baz=two\n')
364        eq(msg.get_params(header='x-header'),
365           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
366
367    # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
368    def test_get_param_liberal(self):
369        msg = Message()
370        msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
371        self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
372
373    def test_get_param(self):
374        eq = self.assertEqual
375        msg = email.message_from_string(
376            "X-Header: foo=one; bar=two; baz=three\n")
377        eq(msg.get_param('bar', header='x-header'), 'two')
378        eq(msg.get_param('quuz', header='x-header'), None)
379        eq(msg.get_param('quuz'), None)
380        msg = email.message_from_string(
381            'X-Header: foo; bar="one"; baz=two\n')
382        eq(msg.get_param('foo', header='x-header'), '')
383        eq(msg.get_param('bar', header='x-header'), 'one')
384        eq(msg.get_param('baz', header='x-header'), 'two')
385        # XXX: We are not RFC-2045 compliant!  We cannot parse:
386        # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
387        # msg.get_param("weird")
388        # yet.
389
390    # test_headerregistry.TestContentTypeHeader.spaces_around_semis
391    def test_get_param_funky_continuation_lines(self):
392        msg = self._msgobj('msg_22.txt')
393        self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
394
395    # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
396    def test_get_param_with_semis_in_quotes(self):
397        msg = email.message_from_string(
398            'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
399        self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
400        self.assertEqual(msg.get_param('name', unquote=False),
401                         '"Jim&amp;&amp;Jill"')
402
403    # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
404    def test_get_param_with_quotes(self):
405        msg = email.message_from_string(
406            'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
407        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
408        msg = email.message_from_string(
409            "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
410        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
411
412    def test_field_containment(self):
413        msg = email.message_from_string('Header: exists')
414        self.assertIn('header', msg)
415        self.assertIn('Header', msg)
416        self.assertIn('HEADER', msg)
417        self.assertNotIn('headerx', msg)
418
419    def test_set_param(self):
420        eq = self.assertEqual
421        msg = Message()
422        msg.set_param('charset', 'iso-2022-jp')
423        eq(msg.get_param('charset'), 'iso-2022-jp')
424        msg.set_param('importance', 'high value')
425        eq(msg.get_param('importance'), 'high value')
426        eq(msg.get_param('importance', unquote=False), '"high value"')
427        eq(msg.get_params(), [('text/plain', ''),
428                              ('charset', 'iso-2022-jp'),
429                              ('importance', 'high value')])
430        eq(msg.get_params(unquote=False), [('text/plain', ''),
431                                       ('charset', '"iso-2022-jp"'),
432                                       ('importance', '"high value"')])
433        msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
434        eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
435
436    def test_del_param(self):
437        eq = self.assertEqual
438        msg = self._msgobj('msg_05.txt')
439        eq(msg.get_params(),
440           [('multipart/report', ''), ('report-type', 'delivery-status'),
441            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
442        old_val = msg.get_param("report-type")
443        msg.del_param("report-type")
444        eq(msg.get_params(),
445           [('multipart/report', ''),
446            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
447        msg.set_param("report-type", old_val)
448        eq(msg.get_params(),
449           [('multipart/report', ''),
450            ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
451            ('report-type', old_val)])
452
453    def test_del_param_on_other_header(self):
454        msg = Message()
455        msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
456        msg.del_param('filename', 'content-disposition')
457        self.assertEqual(msg['content-disposition'], 'attachment')
458
459    def test_del_param_on_nonexistent_header(self):
460        msg = Message()
461        # Deleting param on empty msg should not raise exception.
462        msg.del_param('filename', 'content-disposition')
463
464    def test_del_nonexistent_param(self):
465        msg = Message()
466        msg.add_header('Content-Type', 'text/plain', charset='utf-8')
467        existing_header = msg['Content-Type']
468        msg.del_param('foobar', header='Content-Type')
469        self.assertEqual(msg['Content-Type'], existing_header)
470
471    def test_set_type(self):
472        eq = self.assertEqual
473        msg = Message()
474        self.assertRaises(ValueError, msg.set_type, 'text')
475        msg.set_type('text/plain')
476        eq(msg['content-type'], 'text/plain')
477        msg.set_param('charset', 'us-ascii')
478        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
479        msg.set_type('text/html')
480        eq(msg['content-type'], 'text/html; charset="us-ascii"')
481
482    def test_set_type_on_other_header(self):
483        msg = Message()
484        msg['X-Content-Type'] = 'text/plain'
485        msg.set_type('application/octet-stream', 'X-Content-Type')
486        self.assertEqual(msg['x-content-type'], 'application/octet-stream')
487
488    def test_get_content_type_missing(self):
489        msg = Message()
490        self.assertEqual(msg.get_content_type(), 'text/plain')
491
492    def test_get_content_type_missing_with_default_type(self):
493        msg = Message()
494        msg.set_default_type('message/rfc822')
495        self.assertEqual(msg.get_content_type(), 'message/rfc822')
496
497    def test_get_content_type_from_message_implicit(self):
498        msg = self._msgobj('msg_30.txt')
499        self.assertEqual(msg.get_payload(0).get_content_type(),
500                         'message/rfc822')
501
502    def test_get_content_type_from_message_explicit(self):
503        msg = self._msgobj('msg_28.txt')
504        self.assertEqual(msg.get_payload(0).get_content_type(),
505                         'message/rfc822')
506
507    def test_get_content_type_from_message_text_plain_implicit(self):
508        msg = self._msgobj('msg_03.txt')
509        self.assertEqual(msg.get_content_type(), 'text/plain')
510
511    def test_get_content_type_from_message_text_plain_explicit(self):
512        msg = self._msgobj('msg_01.txt')
513        self.assertEqual(msg.get_content_type(), 'text/plain')
514
515    def test_get_content_maintype_missing(self):
516        msg = Message()
517        self.assertEqual(msg.get_content_maintype(), 'text')
518
519    def test_get_content_maintype_missing_with_default_type(self):
520        msg = Message()
521        msg.set_default_type('message/rfc822')
522        self.assertEqual(msg.get_content_maintype(), 'message')
523
524    def test_get_content_maintype_from_message_implicit(self):
525        msg = self._msgobj('msg_30.txt')
526        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
527
528    def test_get_content_maintype_from_message_explicit(self):
529        msg = self._msgobj('msg_28.txt')
530        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
531
532    def test_get_content_maintype_from_message_text_plain_implicit(self):
533        msg = self._msgobj('msg_03.txt')
534        self.assertEqual(msg.get_content_maintype(), 'text')
535
536    def test_get_content_maintype_from_message_text_plain_explicit(self):
537        msg = self._msgobj('msg_01.txt')
538        self.assertEqual(msg.get_content_maintype(), 'text')
539
540    def test_get_content_subtype_missing(self):
541        msg = Message()
542        self.assertEqual(msg.get_content_subtype(), 'plain')
543
544    def test_get_content_subtype_missing_with_default_type(self):
545        msg = Message()
546        msg.set_default_type('message/rfc822')
547        self.assertEqual(msg.get_content_subtype(), 'rfc822')
548
549    def test_get_content_subtype_from_message_implicit(self):
550        msg = self._msgobj('msg_30.txt')
551        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
552
553    def test_get_content_subtype_from_message_explicit(self):
554        msg = self._msgobj('msg_28.txt')
555        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
556
557    def test_get_content_subtype_from_message_text_plain_implicit(self):
558        msg = self._msgobj('msg_03.txt')
559        self.assertEqual(msg.get_content_subtype(), 'plain')
560
561    def test_get_content_subtype_from_message_text_plain_explicit(self):
562        msg = self._msgobj('msg_01.txt')
563        self.assertEqual(msg.get_content_subtype(), 'plain')
564
565    def test_get_content_maintype_error(self):
566        msg = Message()
567        msg['Content-Type'] = 'no-slash-in-this-string'
568        self.assertEqual(msg.get_content_maintype(), 'text')
569
570    def test_get_content_subtype_error(self):
571        msg = Message()
572        msg['Content-Type'] = 'no-slash-in-this-string'
573        self.assertEqual(msg.get_content_subtype(), 'plain')
574
575    def test_replace_header(self):
576        eq = self.assertEqual
577        msg = Message()
578        msg.add_header('First', 'One')
579        msg.add_header('Second', 'Two')
580        msg.add_header('Third', 'Three')
581        eq(msg.keys(), ['First', 'Second', 'Third'])
582        eq(msg.values(), ['One', 'Two', 'Three'])
583        msg.replace_header('Second', 'Twenty')
584        eq(msg.keys(), ['First', 'Second', 'Third'])
585        eq(msg.values(), ['One', 'Twenty', 'Three'])
586        msg.add_header('First', 'Eleven')
587        msg.replace_header('First', 'One Hundred')
588        eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
589        eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
590        self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
591
592    def test_get_content_disposition(self):
593        msg = Message()
594        self.assertIsNone(msg.get_content_disposition())
595        msg.add_header('Content-Disposition', 'attachment',
596                       filename='random.avi')
597        self.assertEqual(msg.get_content_disposition(), 'attachment')
598        msg.replace_header('Content-Disposition', 'inline')
599        self.assertEqual(msg.get_content_disposition(), 'inline')
600        msg.replace_header('Content-Disposition', 'InlinE')
601        self.assertEqual(msg.get_content_disposition(), 'inline')
602
603    # test_defect_handling:test_invalid_chars_in_base64_payload
604    def test_broken_base64_payload(self):
605        x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
606        msg = Message()
607        msg['content-type'] = 'audio/x-midi'
608        msg['content-transfer-encoding'] = 'base64'
609        msg.set_payload(x)
610        self.assertEqual(msg.get_payload(decode=True),
611                         (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
612                          b'\xa1\x00p\xf6\xbf\xe9\x0f'))
613        self.assertIsInstance(msg.defects[0],
614                              errors.InvalidBase64CharactersDefect)
615
616    def test_broken_unicode_payload(self):
617        # This test improves coverage but is not a compliance test.
618        # The behavior in this situation is currently undefined by the API.
619        x = 'this is a br\xf6ken thing to do'
620        msg = Message()
621        msg['content-type'] = 'text/plain'
622        msg['content-transfer-encoding'] = '8bit'
623        msg.set_payload(x)
624        self.assertEqual(msg.get_payload(decode=True),
625                         bytes(x, 'raw-unicode-escape'))
626
627    def test_questionable_bytes_payload(self):
628        # This test improves coverage but is not a compliance test,
629        # since it involves poking inside the black box.
630        x = 'this is a quéstionable thing to do'.encode('utf-8')
631        msg = Message()
632        msg['content-type'] = 'text/plain; charset="utf-8"'
633        msg['content-transfer-encoding'] = '8bit'
634        msg._payload = x
635        self.assertEqual(msg.get_payload(decode=True), x)
636
637    # Issue 1078919
638    def test_ascii_add_header(self):
639        msg = Message()
640        msg.add_header('Content-Disposition', 'attachment',
641                       filename='bud.gif')
642        self.assertEqual('attachment; filename="bud.gif"',
643            msg['Content-Disposition'])
644
645    def test_noascii_add_header(self):
646        msg = Message()
647        msg.add_header('Content-Disposition', 'attachment',
648            filename="Fußballer.ppt")
649        self.assertEqual(
650            'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
651            msg['Content-Disposition'])
652
653    def test_nonascii_add_header_via_triple(self):
654        msg = Message()
655        msg.add_header('Content-Disposition', 'attachment',
656            filename=('iso-8859-1', '', 'Fußballer.ppt'))
657        self.assertEqual(
658            'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
659            msg['Content-Disposition'])
660
661    def test_ascii_add_header_with_tspecial(self):
662        msg = Message()
663        msg.add_header('Content-Disposition', 'attachment',
664            filename="windows [filename].ppt")
665        self.assertEqual(
666            'attachment; filename="windows [filename].ppt"',
667            msg['Content-Disposition'])
668
669    def test_nonascii_add_header_with_tspecial(self):
670        msg = Message()
671        msg.add_header('Content-Disposition', 'attachment',
672            filename="Fußballer [filename].ppt")
673        self.assertEqual(
674            "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
675            msg['Content-Disposition'])
676
677    def test_binary_quopri_payload(self):
678        for charset in ('latin-1', 'ascii'):
679            msg = Message()
680            msg['content-type'] = 'text/plain; charset=%s' % charset
681            msg['content-transfer-encoding'] = 'quoted-printable'
682            msg.set_payload(b'foo=e6=96=87bar')
683            self.assertEqual(
684                msg.get_payload(decode=True),
685                b'foo\xe6\x96\x87bar',
686                'get_payload returns wrong result with charset %s.' % charset)
687
688    def test_binary_base64_payload(self):
689        for charset in ('latin-1', 'ascii'):
690            msg = Message()
691            msg['content-type'] = 'text/plain; charset=%s' % charset
692            msg['content-transfer-encoding'] = 'base64'
693            msg.set_payload(b'Zm9v5paHYmFy')
694            self.assertEqual(
695                msg.get_payload(decode=True),
696                b'foo\xe6\x96\x87bar',
697                'get_payload returns wrong result with charset %s.' % charset)
698
699    def test_binary_uuencode_payload(self):
700        for charset in ('latin-1', 'ascii'):
701            for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
702                msg = Message()
703                msg['content-type'] = 'text/plain; charset=%s' % charset
704                msg['content-transfer-encoding'] = encoding
705                msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
706                self.assertEqual(
707                    msg.get_payload(decode=True),
708                    b'foo\xe6\x96\x87bar',
709                    str(('get_payload returns wrong result ',
710                         'with charset {0} and encoding {1}.')).\
711                        format(charset, encoding))
712
713    def test_add_header_with_name_only_param(self):
714        msg = Message()
715        msg.add_header('Content-Disposition', 'inline', foo_bar=None)
716        self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
717
718    def test_add_header_with_no_value(self):
719        msg = Message()
720        msg.add_header('X-Status', None)
721        self.assertEqual('', msg['X-Status'])
722
723    # Issue 5871: reject an attempt to embed a header inside a header value
724    # (header injection attack).
725    def test_embedded_header_via_Header_rejected(self):
726        msg = Message()
727        msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
728        self.assertRaises(errors.HeaderParseError, msg.as_string)
729
730    def test_embedded_header_via_string_rejected(self):
731        msg = Message()
732        msg['Dummy'] = 'dummy\nX-Injected-Header: test'
733        self.assertRaises(errors.HeaderParseError, msg.as_string)
734
735    def test_unicode_header_defaults_to_utf8_encoding(self):
736        # Issue 14291
737        m = MIMEText('abc\n')
738        m['Subject'] = 'É test'
739        self.assertEqual(str(m),textwrap.dedent("""\
740            Content-Type: text/plain; charset="us-ascii"
741            MIME-Version: 1.0
742            Content-Transfer-Encoding: 7bit
743            Subject: =?utf-8?q?=C3=89_test?=
744
745            abc
746            """))
747
748    def test_unicode_body_defaults_to_utf8_encoding(self):
749        # Issue 14291
750        m = MIMEText('É testabc\n')
751        self.assertEqual(str(m),textwrap.dedent("""\
752            Content-Type: text/plain; charset="utf-8"
753            MIME-Version: 1.0
754            Content-Transfer-Encoding: base64
755
756            w4kgdGVzdGFiYwo=
757            """))
758
759
760# Test the email.encoders module
761class TestEncoders(unittest.TestCase):
762
763    def test_EncodersEncode_base64(self):
764        with openfile('PyBanner048.gif', 'rb') as fp:
765            bindata = fp.read()
766        mimed = email.mime.image.MIMEImage(bindata)
767        base64ed = mimed.get_payload()
768        # the transfer-encoded body lines should all be <=76 characters
769        lines = base64ed.split('\n')
770        self.assertLessEqual(max([ len(x) for x in lines ]), 76)
771
772    def test_encode_empty_payload(self):
773        eq = self.assertEqual
774        msg = Message()
775        msg.set_charset('us-ascii')
776        eq(msg['content-transfer-encoding'], '7bit')
777
778    def test_default_cte(self):
779        eq = self.assertEqual
780        # 7bit data and the default us-ascii _charset
781        msg = MIMEText('hello world')
782        eq(msg['content-transfer-encoding'], '7bit')
783        # Similar, but with 8bit data
784        msg = MIMEText('hello \xf8 world')
785        eq(msg['content-transfer-encoding'], 'base64')
786        # And now with a different charset
787        msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
788        eq(msg['content-transfer-encoding'], 'quoted-printable')
789
790    def test_encode7or8bit(self):
791        # Make sure a charset whose input character set is 8bit but
792        # whose output character set is 7bit gets a transfer-encoding
793        # of 7bit.
794        eq = self.assertEqual
795        msg = MIMEText('文\n', _charset='euc-jp')
796        eq(msg['content-transfer-encoding'], '7bit')
797        eq(msg.as_string(), textwrap.dedent("""\
798            MIME-Version: 1.0
799            Content-Type: text/plain; charset="iso-2022-jp"
800            Content-Transfer-Encoding: 7bit
801
802            \x1b$BJ8\x1b(B
803            """))
804
805    def test_qp_encode_latin1(self):
806        msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
807        self.assertEqual(str(msg), textwrap.dedent("""\
808            MIME-Version: 1.0
809            Content-Type: text/text; charset="iso-8859-1"
810            Content-Transfer-Encoding: quoted-printable
811
812            =E1=F6
813            """))
814
815    def test_qp_encode_non_latin1(self):
816        # Issue 16948
817        msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
818        self.assertEqual(str(msg), textwrap.dedent("""\
819            MIME-Version: 1.0
820            Content-Type: text/text; charset="iso-8859-2"
821            Content-Transfer-Encoding: quoted-printable
822
823            =BF
824            """))
825
826
827# Test long header wrapping
828class TestLongHeaders(TestEmailBase):
829
830    maxDiff = None
831
832    def test_split_long_continuation(self):
833        eq = self.ndiffAssertEqual
834        msg = email.message_from_string("""\
835Subject: bug demonstration
836\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
837\tmore text
838
839test
840""")
841        sfp = StringIO()
842        g = Generator(sfp)
843        g.flatten(msg)
844        eq(sfp.getvalue(), """\
845Subject: bug demonstration
846\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
847\tmore text
848
849test
850""")
851
852    def test_another_long_almost_unsplittable_header(self):
853        eq = self.ndiffAssertEqual
854        hstr = """\
855bug demonstration
856\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
857\tmore text"""
858        h = Header(hstr, continuation_ws='\t')
859        eq(h.encode(), """\
860bug demonstration
861\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
862\tmore text""")
863        h = Header(hstr.replace('\t', ' '))
864        eq(h.encode(), """\
865bug demonstration
866 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
867 more text""")
868
869    def test_long_nonstring(self):
870        eq = self.ndiffAssertEqual
871        g = Charset("iso-8859-1")
872        cz = Charset("iso-8859-2")
873        utf8 = Charset("utf-8")
874        g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
875                  b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
876                  b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
877                  b'bef\xf6rdert. ')
878        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
879                   b'd\xf9vtipu.. ')
880        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
881                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
882                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
883                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
884                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
885                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
886                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
887                     '\u3044\u307e\u3059\u3002')
888        h = Header(g_head, g, header_name='Subject')
889        h.append(cz_head, cz)
890        h.append(utf8_head, utf8)
891        msg = Message()
892        msg['Subject'] = h
893        sfp = StringIO()
894        g = Generator(sfp)
895        g.flatten(msg)
896        eq(sfp.getvalue(), """\
897Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
898 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
899 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
900 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
901 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
902 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
903 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
904 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
905 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
906 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
907 =?utf-8?b?44CC?=
908
909""")
910        eq(h.encode(maxlinelen=76), """\
911=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
912 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
913 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
914 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
915 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
916 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
917 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
918 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
919 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
920 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
921 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
922
923    def test_long_header_encode(self):
924        eq = self.ndiffAssertEqual
925        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
926                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
927                   header_name='X-Foobar-Spoink-Defrobnit')
928        eq(h.encode(), '''\
929wasnipoop; giraffes="very-long-necked-animals";
930 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
931
932    def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
933        eq = self.ndiffAssertEqual
934        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
935                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
936                   header_name='X-Foobar-Spoink-Defrobnit',
937                   continuation_ws='\t')
938        eq(h.encode(), '''\
939wasnipoop; giraffes="very-long-necked-animals";
940 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
941
942    def test_long_header_encode_with_tab_continuation(self):
943        eq = self.ndiffAssertEqual
944        h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
945                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
946                   header_name='X-Foobar-Spoink-Defrobnit',
947                   continuation_ws='\t')
948        eq(h.encode(), '''\
949wasnipoop; giraffes="very-long-necked-animals";
950\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
951
952    def test_header_encode_with_different_output_charset(self):
953        h = Header('文', 'euc-jp')
954        self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
955
956    def test_long_header_encode_with_different_output_charset(self):
957        h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
958            b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
959            b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
960            b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
961        res = """\
962=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
963 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
964        self.assertEqual(h.encode(), res)
965
966    def test_header_splitter(self):
967        eq = self.ndiffAssertEqual
968        msg = MIMEText('')
969        # It'd be great if we could use add_header() here, but that doesn't
970        # guarantee an order of the parameters.
971        msg['X-Foobar-Spoink-Defrobnit'] = (
972            'wasnipoop; giraffes="very-long-necked-animals"; '
973            'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
974        sfp = StringIO()
975        g = Generator(sfp)
976        g.flatten(msg)
977        eq(sfp.getvalue(), '''\
978Content-Type: text/plain; charset="us-ascii"
979MIME-Version: 1.0
980Content-Transfer-Encoding: 7bit
981X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
982 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
983
984''')
985
986    def test_no_semis_header_splitter(self):
987        eq = self.ndiffAssertEqual
988        msg = Message()
989        msg['From'] = 'test@dom.ain'
990        msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
991        msg.set_payload('Test')
992        sfp = StringIO()
993        g = Generator(sfp)
994        g.flatten(msg)
995        eq(sfp.getvalue(), """\
996From: test@dom.ain
997References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
998 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
999
1000Test""")
1001
1002    def test_last_split_chunk_does_not_fit(self):
1003        eq = self.ndiffAssertEqual
1004        h = Header('Subject: the first part of this is short, but_the_second'
1005            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1006            '_all_by_itself')
1007        eq(h.encode(), """\
1008Subject: the first part of this is short,
1009 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1010
1011    def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
1012        eq = self.ndiffAssertEqual
1013        h = Header(', but_the_second'
1014            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1015            '_all_by_itself')
1016        eq(h.encode(), """\
1017,
1018 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1019
1020    def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
1021        eq = self.ndiffAssertEqual
1022        h = Header(', , but_the_second'
1023            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1024            '_all_by_itself')
1025        eq(h.encode(), """\
1026, ,
1027 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1028
1029    def test_trailing_splitable_on_overlong_unsplitable(self):
1030        eq = self.ndiffAssertEqual
1031        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1032            'be_on_a_line_all_by_itself;')
1033        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
1034            "be_on_a_line_all_by_itself;")
1035
1036    def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
1037        eq = self.ndiffAssertEqual
1038        h = Header('; '
1039            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1040            'be_on_a_line_all_by_itself; ')
1041        eq(h.encode(), """\
1042;
1043 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1044
1045    def test_long_header_with_multiple_sequential_split_chars(self):
1046        eq = self.ndiffAssertEqual
1047        h = Header('This is a long line that has two whitespaces  in a row.  '
1048            'This used to cause truncation of the header when folded')
1049        eq(h.encode(), """\
1050This is a long line that has two whitespaces  in a row.  This used to cause
1051 truncation of the header when folded""")
1052
1053    def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
1054        eq = self.ndiffAssertEqual
1055        h = Header('thisverylongheaderhas;semicolons;and,commas,but'
1056            'they;arenotlegal;fold,points')
1057        eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
1058                        "arenotlegal;fold,points")
1059
1060    def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
1061        eq = self.ndiffAssertEqual
1062        h = Header('this is a  test where we need to have more than one line '
1063            'before; our final line that is just too big to fit;; '
1064            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1065            'be_on_a_line_all_by_itself;')
1066        eq(h.encode(), """\
1067this is a  test where we need to have more than one line before;
1068 our final line that is just too big to fit;;
1069 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
1070
1071    def test_overlong_last_part_followed_by_split_point(self):
1072        eq = self.ndiffAssertEqual
1073        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1074            'be_on_a_line_all_by_itself ')
1075        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
1076                        "should_be_on_a_line_all_by_itself ")
1077
1078    def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
1079        eq = self.ndiffAssertEqual
1080        h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
1081            'before_our_final_line_; ; '
1082            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1083            'be_on_a_line_all_by_itself; ')
1084        eq(h.encode(), """\
1085this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
1086 ;
1087 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1088
1089    def test_multiline_with_overlong_last_part_followed_by_split_point(self):
1090        eq = self.ndiffAssertEqual
1091        h = Header('this is a test where we need to have more than one line '
1092            'before our final line; ; '
1093            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1094            'be_on_a_line_all_by_itself; ')
1095        eq(h.encode(), """\
1096this is a test where we need to have more than one line before our final line;
1097 ;
1098 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1099
1100    def test_long_header_with_whitespace_runs(self):
1101        eq = self.ndiffAssertEqual
1102        msg = Message()
1103        msg['From'] = 'test@dom.ain'
1104        msg['References'] = SPACE.join(['<foo@dom.ain>  '] * 10)
1105        msg.set_payload('Test')
1106        sfp = StringIO()
1107        g = Generator(sfp)
1108        g.flatten(msg)
1109        eq(sfp.getvalue(), """\
1110From: test@dom.ain
1111References: <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>
1112   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>
1113   <foo@dom.ain>   <foo@dom.ain>\x20\x20
1114
1115Test""")
1116
1117    def test_long_run_with_semi_header_splitter(self):
1118        eq = self.ndiffAssertEqual
1119        msg = Message()
1120        msg['From'] = 'test@dom.ain'
1121        msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
1122        msg.set_payload('Test')
1123        sfp = StringIO()
1124        g = Generator(sfp)
1125        g.flatten(msg)
1126        eq(sfp.getvalue(), """\
1127From: test@dom.ain
1128References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1129 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1130 <foo@dom.ain>; abc
1131
1132Test""")
1133
1134    def test_splitter_split_on_punctuation_only_if_fws(self):
1135        eq = self.ndiffAssertEqual
1136        msg = Message()
1137        msg['From'] = 'test@dom.ain'
1138        msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1139            'they;arenotlegal;fold,points')
1140        msg.set_payload('Test')
1141        sfp = StringIO()
1142        g = Generator(sfp)
1143        g.flatten(msg)
1144        # XXX the space after the header should not be there.
1145        eq(sfp.getvalue(), """\
1146From: test@dom.ain
1147References:\x20
1148 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1149
1150Test""")
1151
1152    def test_no_split_long_header(self):
1153        eq = self.ndiffAssertEqual
1154        hstr = 'References: ' + 'x' * 80
1155        h = Header(hstr)
1156        # These come on two lines because Headers are really field value
1157        # classes and don't really know about their field names.
1158        eq(h.encode(), """\
1159References:
1160 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1161        h = Header('x' * 80)
1162        eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
1163
1164    def test_splitting_multiple_long_lines(self):
1165        eq = self.ndiffAssertEqual
1166        hstr = """\
1167from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1168\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1169\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1170"""
1171        h = Header(hstr, continuation_ws='\t')
1172        eq(h.encode(), """\
1173from babylon.socal-raves.org (localhost [127.0.0.1]);
1174 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1175 for <mailman-admin@babylon.socal-raves.org>;
1176 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1177\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1178 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1179 for <mailman-admin@babylon.socal-raves.org>;
1180 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1181\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1182 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1183 for <mailman-admin@babylon.socal-raves.org>;
1184 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1185
1186    def test_splitting_first_line_only_is_long(self):
1187        eq = self.ndiffAssertEqual
1188        hstr = """\
1189from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1190\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1191\tid 17k4h5-00034i-00
1192\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1193        h = Header(hstr, maxlinelen=78, header_name='Received',
1194                   continuation_ws='\t')
1195        eq(h.encode(), """\
1196from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1197 helo=cthulhu.gerg.ca)
1198\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1199\tid 17k4h5-00034i-00
1200\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1201
1202    def test_long_8bit_header(self):
1203        eq = self.ndiffAssertEqual
1204        msg = Message()
1205        h = Header('Britische Regierung gibt', 'iso-8859-1',
1206                    header_name='Subject')
1207        h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
1208        eq(h.encode(maxlinelen=76), """\
1209=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1210 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
1211        msg['Subject'] = h
1212        eq(msg.as_string(maxheaderlen=76), """\
1213Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1214 =?iso-8859-1?q?hore-Windkraftprojekte?=
1215
1216""")
1217        eq(msg.as_string(maxheaderlen=0), """\
1218Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
1219
1220""")
1221
1222    def test_long_8bit_header_no_charset(self):
1223        eq = self.ndiffAssertEqual
1224        msg = Message()
1225        header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1226                         'f\xfcr Offshore-Windkraftprojekte '
1227                         '<a-very-long-address@example.com>')
1228        msg['Reply-To'] = header_string
1229        eq(msg.as_string(maxheaderlen=78), """\
1230Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1231 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1232
1233""")
1234        msg = Message()
1235        msg['Reply-To'] = Header(header_string,
1236                                 header_name='Reply-To')
1237        eq(msg.as_string(maxheaderlen=78), """\
1238Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1239 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1240
1241""")
1242
1243    def test_long_to_header(self):
1244        eq = self.ndiffAssertEqual
1245        to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
1246              '<someone@eecs.umich.edu>, '
1247              '"Someone Test #B" <someone@umich.edu>, '
1248              '"Someone Test #C" <someone@eecs.umich.edu>, '
1249              '"Someone Test #D" <someone@eecs.umich.edu>')
1250        msg = Message()
1251        msg['To'] = to
1252        eq(msg.as_string(maxheaderlen=78), '''\
1253To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
1254 "Someone Test #B" <someone@umich.edu>,
1255 "Someone Test #C" <someone@eecs.umich.edu>,
1256 "Someone Test #D" <someone@eecs.umich.edu>
1257
1258''')
1259
1260    def test_long_line_after_append(self):
1261        eq = self.ndiffAssertEqual
1262        s = 'This is an example of string which has almost the limit of header length.'
1263        h = Header(s)
1264        h.append('Add another line.')
1265        eq(h.encode(maxlinelen=76), """\
1266This is an example of string which has almost the limit of header length.
1267 Add another line.""")
1268
1269    def test_shorter_line_with_append(self):
1270        eq = self.ndiffAssertEqual
1271        s = 'This is a shorter line.'
1272        h = Header(s)
1273        h.append('Add another sentence. (Surprise?)')
1274        eq(h.encode(),
1275           'This is a shorter line. Add another sentence. (Surprise?)')
1276
1277    def test_long_field_name(self):
1278        eq = self.ndiffAssertEqual
1279        fn = 'X-Very-Very-Very-Long-Header-Name'
1280        gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1281              'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1282              'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1283              'bef\xf6rdert. ')
1284        h = Header(gs, 'iso-8859-1', header_name=fn)
1285        # BAW: this seems broken because the first line is too long
1286        eq(h.encode(maxlinelen=76), """\
1287=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1288 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1289 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1290 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
1291
1292    def test_long_received_header(self):
1293        h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1294             'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1295             'Wed, 05 Mar 2003 18:10:18 -0700')
1296        msg = Message()
1297        msg['Received-1'] = Header(h, continuation_ws='\t')
1298        msg['Received-2'] = h
1299        # This should be splitting on spaces not semicolons.
1300        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1301Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1302 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1303 Wed, 05 Mar 2003 18:10:18 -0700
1304Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1305 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1306 Wed, 05 Mar 2003 18:10:18 -0700
1307
1308""")
1309
1310    def test_string_headerinst_eq(self):
1311        h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1312             'tu-muenchen.de> (David Bremner\'s message of '
1313             '"Thu, 6 Mar 2003 13:58:21 +0100")')
1314        msg = Message()
1315        msg['Received-1'] = Header(h, header_name='Received-1',
1316                                   continuation_ws='\t')
1317        msg['Received-2'] = h
1318        # XXX The space after the ':' should not be there.
1319        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1320Received-1:\x20
1321 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1322 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1323Received-2:\x20
1324 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1325 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1326
1327""")
1328
1329    def test_long_unbreakable_lines_with_continuation(self):
1330        eq = self.ndiffAssertEqual
1331        msg = Message()
1332        t = """\
1333iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1334 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1335        msg['Face-1'] = t
1336        msg['Face-2'] = Header(t, header_name='Face-2')
1337        msg['Face-3'] = ' ' + t
1338        # XXX This splitting is all wrong.  It the first value line should be
1339        # snug against the field name or the space after the header not there.
1340        eq(msg.as_string(maxheaderlen=78), """\
1341Face-1:\x20
1342 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1343 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1344Face-2:\x20
1345 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1346 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1347Face-3:\x20
1348 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1349 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1350
1351""")
1352
1353    def test_another_long_multiline_header(self):
1354        eq = self.ndiffAssertEqual
1355        m = ('Received: from siimage.com '
1356             '([172.25.1.3]) by zima.siliconimage.com with '
1357             'Microsoft SMTPSVC(5.0.2195.4905); '
1358             'Wed, 16 Oct 2002 07:41:11 -0700')
1359        msg = email.message_from_string(m)
1360        eq(msg.as_string(maxheaderlen=78), '''\
1361Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1362 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
1363
1364''')
1365
1366    def test_long_lines_with_different_header(self):
1367        eq = self.ndiffAssertEqual
1368        h = ('List-Unsubscribe: '
1369             '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1370             '        <mailto:spamassassin-talk-request@lists.sourceforge.net'
1371             '?subject=unsubscribe>')
1372        msg = Message()
1373        msg['List'] = h
1374        msg['List'] = Header(h, header_name='List')
1375        eq(msg.as_string(maxheaderlen=78), """\
1376List: List-Unsubscribe:
1377 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1378        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1379List: List-Unsubscribe:
1380 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1381        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1382
1383""")
1384
1385    def test_long_rfc2047_header_with_embedded_fws(self):
1386        h = Header(textwrap.dedent("""\
1387            We're going to pretend this header is in a non-ascii character set
1388            \tto see if line wrapping with encoded words and embedded
1389               folding white space works"""),
1390                   charset='utf-8',
1391                   header_name='Test')
1392        self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1393            =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1394             =?utf-8?q?cter_set?=
1395             =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1396             =?utf-8?q?_folding_white_space_works?=""")+'\n')
1397
1398
1399
1400# Test mangling of "From " lines in the body of a message
1401class TestFromMangling(unittest.TestCase):
1402    def setUp(self):
1403        self.msg = Message()
1404        self.msg['From'] = 'aaa@bbb.org'
1405        self.msg.set_payload("""\
1406From the desk of A.A.A.:
1407Blah blah blah
1408""")
1409
1410    def test_mangled_from(self):
1411        s = StringIO()
1412        g = Generator(s, mangle_from_=True)
1413        g.flatten(self.msg)
1414        self.assertEqual(s.getvalue(), """\
1415From: aaa@bbb.org
1416
1417>From the desk of A.A.A.:
1418Blah blah blah
1419""")
1420
1421    def test_dont_mangle_from(self):
1422        s = StringIO()
1423        g = Generator(s, mangle_from_=False)
1424        g.flatten(self.msg)
1425        self.assertEqual(s.getvalue(), """\
1426From: aaa@bbb.org
1427
1428From the desk of A.A.A.:
1429Blah blah blah
1430""")
1431
1432    def test_mangle_from_in_preamble_and_epilog(self):
1433        s = StringIO()
1434        g = Generator(s, mangle_from_=True)
1435        msg = email.message_from_string(textwrap.dedent("""\
1436            From: foo@bar.com
1437            Mime-Version: 1.0
1438            Content-Type: multipart/mixed; boundary=XXX
1439
1440            From somewhere unknown
1441
1442            --XXX
1443            Content-Type: text/plain
1444
1445            foo
1446
1447            --XXX--
1448
1449            From somewhere unknowable
1450            """))
1451        g.flatten(msg)
1452        self.assertEqual(len([1 for x in s.getvalue().split('\n')
1453                                  if x.startswith('>From ')]), 2)
1454
1455    def test_mangled_from_with_bad_bytes(self):
1456        source = textwrap.dedent("""\
1457            Content-Type: text/plain; charset="utf-8"
1458            MIME-Version: 1.0
1459            Content-Transfer-Encoding: 8bit
1460            From: aaa@bbb.org
1461
1462        """).encode('utf-8')
1463        msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1464        b = BytesIO()
1465        g = BytesGenerator(b, mangle_from_=True)
1466        g.flatten(msg)
1467        self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1468
1469    def test_mutltipart_with_bad_bytes_in_cte(self):
1470        # bpo30835
1471        source = textwrap.dedent("""\
1472            From: aperson@example.com
1473            Content-Type: multipart/mixed; boundary="1"
1474            Content-Transfer-Encoding: \xc8
1475        """).encode('utf-8')
1476        msg = email.message_from_bytes(source)
1477
1478
1479# Test the basic MIMEAudio class
1480class TestMIMEAudio(unittest.TestCase):
1481    def setUp(self):
1482        with openfile('audiotest.au', 'rb') as fp:
1483            self._audiodata = fp.read()
1484        self._au = MIMEAudio(self._audiodata)
1485
1486    def test_guess_minor_type(self):
1487        self.assertEqual(self._au.get_content_type(), 'audio/basic')
1488
1489    def test_encoding(self):
1490        payload = self._au.get_payload()
1491        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1492                self._audiodata)
1493
1494    def test_checkSetMinor(self):
1495        au = MIMEAudio(self._audiodata, 'fish')
1496        self.assertEqual(au.get_content_type(), 'audio/fish')
1497
1498    def test_add_header(self):
1499        eq = self.assertEqual
1500        self._au.add_header('Content-Disposition', 'attachment',
1501                            filename='audiotest.au')
1502        eq(self._au['content-disposition'],
1503           'attachment; filename="audiotest.au"')
1504        eq(self._au.get_params(header='content-disposition'),
1505           [('attachment', ''), ('filename', 'audiotest.au')])
1506        eq(self._au.get_param('filename', header='content-disposition'),
1507           'audiotest.au')
1508        missing = []
1509        eq(self._au.get_param('attachment', header='content-disposition'), '')
1510        self.assertIs(self._au.get_param('foo', failobj=missing,
1511                                         header='content-disposition'), missing)
1512        # Try some missing stuff
1513        self.assertIs(self._au.get_param('foobar', missing), missing)
1514        self.assertIs(self._au.get_param('attachment', missing,
1515                                         header='foobar'), missing)
1516
1517
1518
1519# Test the basic MIMEImage class
1520class TestMIMEImage(unittest.TestCase):
1521    def setUp(self):
1522        with openfile('PyBanner048.gif', 'rb') as fp:
1523            self._imgdata = fp.read()
1524        self._im = MIMEImage(self._imgdata)
1525
1526    def test_guess_minor_type(self):
1527        self.assertEqual(self._im.get_content_type(), 'image/gif')
1528
1529    def test_encoding(self):
1530        payload = self._im.get_payload()
1531        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1532                self._imgdata)
1533
1534    def test_checkSetMinor(self):
1535        im = MIMEImage(self._imgdata, 'fish')
1536        self.assertEqual(im.get_content_type(), 'image/fish')
1537
1538    def test_add_header(self):
1539        eq = self.assertEqual
1540        self._im.add_header('Content-Disposition', 'attachment',
1541                            filename='dingusfish.gif')
1542        eq(self._im['content-disposition'],
1543           'attachment; filename="dingusfish.gif"')
1544        eq(self._im.get_params(header='content-disposition'),
1545           [('attachment', ''), ('filename', 'dingusfish.gif')])
1546        eq(self._im.get_param('filename', header='content-disposition'),
1547           'dingusfish.gif')
1548        missing = []
1549        eq(self._im.get_param('attachment', header='content-disposition'), '')
1550        self.assertIs(self._im.get_param('foo', failobj=missing,
1551                                         header='content-disposition'), missing)
1552        # Try some missing stuff
1553        self.assertIs(self._im.get_param('foobar', missing), missing)
1554        self.assertIs(self._im.get_param('attachment', missing,
1555                                         header='foobar'), missing)
1556
1557
1558
1559# Test the basic MIMEApplication class
1560class TestMIMEApplication(unittest.TestCase):
1561    def test_headers(self):
1562        eq = self.assertEqual
1563        msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
1564        eq(msg.get_content_type(), 'application/octet-stream')
1565        eq(msg['content-transfer-encoding'], 'base64')
1566
1567    def test_body(self):
1568        eq = self.assertEqual
1569        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1570        msg = MIMEApplication(bytesdata)
1571        # whitespace in the cte encoded block is RFC-irrelevant.
1572        eq(msg.get_payload().strip(), '+vv8/f7/')
1573        eq(msg.get_payload(decode=True), bytesdata)
1574
1575    def test_binary_body_with_encode_7or8bit(self):
1576        # Issue 17171.
1577        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1578        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1579        # Treated as a string, this will be invalid code points.
1580        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1581        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1582        self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1583        s = BytesIO()
1584        g = BytesGenerator(s)
1585        g.flatten(msg)
1586        wireform = s.getvalue()
1587        msg2 = email.message_from_bytes(wireform)
1588        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1589        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1590        self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1591
1592    def test_binary_body_with_encode_noop(self):
1593        # Issue 16564: This does not produce an RFC valid message, since to be
1594        # valid it should have a CTE of binary.  But the below works in
1595        # Python2, and is documented as working this way.
1596        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1597        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1598        # Treated as a string, this will be invalid code points.
1599        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1600        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1601        s = BytesIO()
1602        g = BytesGenerator(s)
1603        g.flatten(msg)
1604        wireform = s.getvalue()
1605        msg2 = email.message_from_bytes(wireform)
1606        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1607        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1608
1609    def test_binary_body_with_unicode_linend_encode_noop(self):
1610        # Issue 19003: This is a variation on #16564.
1611        bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff'
1612        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1613        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1614        s = BytesIO()
1615        g = BytesGenerator(s)
1616        g.flatten(msg)
1617        wireform = s.getvalue()
1618        msg2 = email.message_from_bytes(wireform)
1619        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1620
1621    def test_binary_body_with_encode_quopri(self):
1622        # Issue 14360.
1623        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1624        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1625        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1626        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1627        self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1628        s = BytesIO()
1629        g = BytesGenerator(s)
1630        g.flatten(msg)
1631        wireform = s.getvalue()
1632        msg2 = email.message_from_bytes(wireform)
1633        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1634        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1635        self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1636
1637    def test_binary_body_with_encode_base64(self):
1638        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1639        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1640        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1641        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1642        s = BytesIO()
1643        g = BytesGenerator(s)
1644        g.flatten(msg)
1645        wireform = s.getvalue()
1646        msg2 = email.message_from_bytes(wireform)
1647        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1648        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1649
1650
1651# Test the basic MIMEText class
1652class TestMIMEText(unittest.TestCase):
1653    def setUp(self):
1654        self._msg = MIMEText('hello there')
1655
1656    def test_types(self):
1657        eq = self.assertEqual
1658        eq(self._msg.get_content_type(), 'text/plain')
1659        eq(self._msg.get_param('charset'), 'us-ascii')
1660        missing = []
1661        self.assertIs(self._msg.get_param('foobar', missing), missing)
1662        self.assertIs(self._msg.get_param('charset', missing, header='foobar'),
1663                      missing)
1664
1665    def test_payload(self):
1666        self.assertEqual(self._msg.get_payload(), 'hello there')
1667        self.assertFalse(self._msg.is_multipart())
1668
1669    def test_charset(self):
1670        eq = self.assertEqual
1671        msg = MIMEText('hello there', _charset='us-ascii')
1672        eq(msg.get_charset().input_charset, 'us-ascii')
1673        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1674        # Also accept a Charset instance
1675        charset = Charset('utf-8')
1676        charset.body_encoding = None
1677        msg = MIMEText('hello there', _charset=charset)
1678        eq(msg.get_charset().input_charset, 'utf-8')
1679        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1680        eq(msg.get_payload(), 'hello there')
1681
1682    def test_7bit_input(self):
1683        eq = self.assertEqual
1684        msg = MIMEText('hello there', _charset='us-ascii')
1685        eq(msg.get_charset().input_charset, 'us-ascii')
1686        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1687
1688    def test_7bit_input_no_charset(self):
1689        eq = self.assertEqual
1690        msg = MIMEText('hello there')
1691        eq(msg.get_charset(), 'us-ascii')
1692        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1693        self.assertIn('hello there', msg.as_string())
1694
1695    def test_utf8_input(self):
1696        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1697        eq = self.assertEqual
1698        msg = MIMEText(teststr, _charset='utf-8')
1699        eq(msg.get_charset().output_charset, 'utf-8')
1700        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1701        eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1702
1703    @unittest.skip("can't fix because of backward compat in email5, "
1704        "will fix in email6")
1705    def test_utf8_input_no_charset(self):
1706        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1707        self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1708
1709
1710
1711# Test complicated multipart/* messages
1712class TestMultipart(TestEmailBase):
1713    def setUp(self):
1714        with openfile('PyBanner048.gif', 'rb') as fp:
1715            data = fp.read()
1716        container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1717        image = MIMEImage(data, name='dingusfish.gif')
1718        image.add_header('content-disposition', 'attachment',
1719                         filename='dingusfish.gif')
1720        intro = MIMEText('''\
1721Hi there,
1722
1723This is the dingus fish.
1724''')
1725        container.attach(intro)
1726        container.attach(image)
1727        container['From'] = 'Barry <barry@digicool.com>'
1728        container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1729        container['Subject'] = 'Here is your dingus fish'
1730
1731        now = 987809702.54848599
1732        timetuple = time.localtime(now)
1733        if timetuple[-1] == 0:
1734            tzsecs = time.timezone
1735        else:
1736            tzsecs = time.altzone
1737        if tzsecs > 0:
1738            sign = '-'
1739        else:
1740            sign = '+'
1741        tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1742        container['Date'] = time.strftime(
1743            '%a, %d %b %Y %H:%M:%S',
1744            time.localtime(now)) + tzoffset
1745        self._msg = container
1746        self._im = image
1747        self._txt = intro
1748
1749    def test_hierarchy(self):
1750        # convenience
1751        eq = self.assertEqual
1752        raises = self.assertRaises
1753        # tests
1754        m = self._msg
1755        self.assertTrue(m.is_multipart())
1756        eq(m.get_content_type(), 'multipart/mixed')
1757        eq(len(m.get_payload()), 2)
1758        raises(IndexError, m.get_payload, 2)
1759        m0 = m.get_payload(0)
1760        m1 = m.get_payload(1)
1761        self.assertIs(m0, self._txt)
1762        self.assertIs(m1, self._im)
1763        eq(m.get_payload(), [m0, m1])
1764        self.assertFalse(m0.is_multipart())
1765        self.assertFalse(m1.is_multipart())
1766
1767    def test_empty_multipart_idempotent(self):
1768        text = """\
1769Content-Type: multipart/mixed; boundary="BOUNDARY"
1770MIME-Version: 1.0
1771Subject: A subject
1772To: aperson@dom.ain
1773From: bperson@dom.ain
1774
1775
1776--BOUNDARY
1777
1778
1779--BOUNDARY--
1780"""
1781        msg = Parser().parsestr(text)
1782        self.ndiffAssertEqual(text, msg.as_string())
1783
1784    def test_no_parts_in_a_multipart_with_none_epilogue(self):
1785        outer = MIMEBase('multipart', 'mixed')
1786        outer['Subject'] = 'A subject'
1787        outer['To'] = 'aperson@dom.ain'
1788        outer['From'] = 'bperson@dom.ain'
1789        outer.set_boundary('BOUNDARY')
1790        self.ndiffAssertEqual(outer.as_string(), '''\
1791Content-Type: multipart/mixed; boundary="BOUNDARY"
1792MIME-Version: 1.0
1793Subject: A subject
1794To: aperson@dom.ain
1795From: bperson@dom.ain
1796
1797--BOUNDARY
1798
1799--BOUNDARY--
1800''')
1801
1802    def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1803        outer = MIMEBase('multipart', 'mixed')
1804        outer['Subject'] = 'A subject'
1805        outer['To'] = 'aperson@dom.ain'
1806        outer['From'] = 'bperson@dom.ain'
1807        outer.preamble = ''
1808        outer.epilogue = ''
1809        outer.set_boundary('BOUNDARY')
1810        self.ndiffAssertEqual(outer.as_string(), '''\
1811Content-Type: multipart/mixed; boundary="BOUNDARY"
1812MIME-Version: 1.0
1813Subject: A subject
1814To: aperson@dom.ain
1815From: bperson@dom.ain
1816
1817
1818--BOUNDARY
1819
1820--BOUNDARY--
1821''')
1822
1823    def test_one_part_in_a_multipart(self):
1824        eq = self.ndiffAssertEqual
1825        outer = MIMEBase('multipart', 'mixed')
1826        outer['Subject'] = 'A subject'
1827        outer['To'] = 'aperson@dom.ain'
1828        outer['From'] = 'bperson@dom.ain'
1829        outer.set_boundary('BOUNDARY')
1830        msg = MIMEText('hello world')
1831        outer.attach(msg)
1832        eq(outer.as_string(), '''\
1833Content-Type: multipart/mixed; boundary="BOUNDARY"
1834MIME-Version: 1.0
1835Subject: A subject
1836To: aperson@dom.ain
1837From: bperson@dom.ain
1838
1839--BOUNDARY
1840Content-Type: text/plain; charset="us-ascii"
1841MIME-Version: 1.0
1842Content-Transfer-Encoding: 7bit
1843
1844hello world
1845--BOUNDARY--
1846''')
1847
1848    def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1849        eq = self.ndiffAssertEqual
1850        outer = MIMEBase('multipart', 'mixed')
1851        outer['Subject'] = 'A subject'
1852        outer['To'] = 'aperson@dom.ain'
1853        outer['From'] = 'bperson@dom.ain'
1854        outer.preamble = ''
1855        msg = MIMEText('hello world')
1856        outer.attach(msg)
1857        outer.set_boundary('BOUNDARY')
1858        eq(outer.as_string(), '''\
1859Content-Type: multipart/mixed; boundary="BOUNDARY"
1860MIME-Version: 1.0
1861Subject: A subject
1862To: aperson@dom.ain
1863From: bperson@dom.ain
1864
1865
1866--BOUNDARY
1867Content-Type: text/plain; charset="us-ascii"
1868MIME-Version: 1.0
1869Content-Transfer-Encoding: 7bit
1870
1871hello world
1872--BOUNDARY--
1873''')
1874
1875
1876    def test_seq_parts_in_a_multipart_with_none_preamble(self):
1877        eq = self.ndiffAssertEqual
1878        outer = MIMEBase('multipart', 'mixed')
1879        outer['Subject'] = 'A subject'
1880        outer['To'] = 'aperson@dom.ain'
1881        outer['From'] = 'bperson@dom.ain'
1882        outer.preamble = None
1883        msg = MIMEText('hello world')
1884        outer.attach(msg)
1885        outer.set_boundary('BOUNDARY')
1886        eq(outer.as_string(), '''\
1887Content-Type: multipart/mixed; boundary="BOUNDARY"
1888MIME-Version: 1.0
1889Subject: A subject
1890To: aperson@dom.ain
1891From: bperson@dom.ain
1892
1893--BOUNDARY
1894Content-Type: text/plain; charset="us-ascii"
1895MIME-Version: 1.0
1896Content-Transfer-Encoding: 7bit
1897
1898hello world
1899--BOUNDARY--
1900''')
1901
1902
1903    def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1904        eq = self.ndiffAssertEqual
1905        outer = MIMEBase('multipart', 'mixed')
1906        outer['Subject'] = 'A subject'
1907        outer['To'] = 'aperson@dom.ain'
1908        outer['From'] = 'bperson@dom.ain'
1909        outer.epilogue = None
1910        msg = MIMEText('hello world')
1911        outer.attach(msg)
1912        outer.set_boundary('BOUNDARY')
1913        eq(outer.as_string(), '''\
1914Content-Type: multipart/mixed; boundary="BOUNDARY"
1915MIME-Version: 1.0
1916Subject: A subject
1917To: aperson@dom.ain
1918From: bperson@dom.ain
1919
1920--BOUNDARY
1921Content-Type: text/plain; charset="us-ascii"
1922MIME-Version: 1.0
1923Content-Transfer-Encoding: 7bit
1924
1925hello world
1926--BOUNDARY--
1927''')
1928
1929
1930    def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1931        eq = self.ndiffAssertEqual
1932        outer = MIMEBase('multipart', 'mixed')
1933        outer['Subject'] = 'A subject'
1934        outer['To'] = 'aperson@dom.ain'
1935        outer['From'] = 'bperson@dom.ain'
1936        outer.epilogue = ''
1937        msg = MIMEText('hello world')
1938        outer.attach(msg)
1939        outer.set_boundary('BOUNDARY')
1940        eq(outer.as_string(), '''\
1941Content-Type: multipart/mixed; boundary="BOUNDARY"
1942MIME-Version: 1.0
1943Subject: A subject
1944To: aperson@dom.ain
1945From: bperson@dom.ain
1946
1947--BOUNDARY
1948Content-Type: text/plain; charset="us-ascii"
1949MIME-Version: 1.0
1950Content-Transfer-Encoding: 7bit
1951
1952hello world
1953--BOUNDARY--
1954''')
1955
1956
1957    def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1958        eq = self.ndiffAssertEqual
1959        outer = MIMEBase('multipart', 'mixed')
1960        outer['Subject'] = 'A subject'
1961        outer['To'] = 'aperson@dom.ain'
1962        outer['From'] = 'bperson@dom.ain'
1963        outer.epilogue = '\n'
1964        msg = MIMEText('hello world')
1965        outer.attach(msg)
1966        outer.set_boundary('BOUNDARY')
1967        eq(outer.as_string(), '''\
1968Content-Type: multipart/mixed; boundary="BOUNDARY"
1969MIME-Version: 1.0
1970Subject: A subject
1971To: aperson@dom.ain
1972From: bperson@dom.ain
1973
1974--BOUNDARY
1975Content-Type: text/plain; charset="us-ascii"
1976MIME-Version: 1.0
1977Content-Transfer-Encoding: 7bit
1978
1979hello world
1980--BOUNDARY--
1981
1982''')
1983
1984    def test_message_external_body(self):
1985        eq = self.assertEqual
1986        msg = self._msgobj('msg_36.txt')
1987        eq(len(msg.get_payload()), 2)
1988        msg1 = msg.get_payload(1)
1989        eq(msg1.get_content_type(), 'multipart/alternative')
1990        eq(len(msg1.get_payload()), 2)
1991        for subpart in msg1.get_payload():
1992            eq(subpart.get_content_type(), 'message/external-body')
1993            eq(len(subpart.get_payload()), 1)
1994            subsubpart = subpart.get_payload(0)
1995            eq(subsubpart.get_content_type(), 'text/plain')
1996
1997    def test_double_boundary(self):
1998        # msg_37.txt is a multipart that contains two dash-boundary's in a
1999        # row.  Our interpretation of RFC 2046 calls for ignoring the second
2000        # and subsequent boundaries.
2001        msg = self._msgobj('msg_37.txt')
2002        self.assertEqual(len(msg.get_payload()), 3)
2003
2004    def test_nested_inner_contains_outer_boundary(self):
2005        eq = self.ndiffAssertEqual
2006        # msg_38.txt has an inner part that contains outer boundaries.  My
2007        # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
2008        # these are illegal and should be interpreted as unterminated inner
2009        # parts.
2010        msg = self._msgobj('msg_38.txt')
2011        sfp = StringIO()
2012        iterators._structure(msg, sfp)
2013        eq(sfp.getvalue(), """\
2014multipart/mixed
2015    multipart/mixed
2016        multipart/alternative
2017            text/plain
2018        text/plain
2019    text/plain
2020    text/plain
2021""")
2022
2023    def test_nested_with_same_boundary(self):
2024        eq = self.ndiffAssertEqual
2025        # msg 39.txt is similarly evil in that it's got inner parts that use
2026        # the same boundary as outer parts.  Again, I believe the way this is
2027        # parsed is closest to the spirit of RFC 2046
2028        msg = self._msgobj('msg_39.txt')
2029        sfp = StringIO()
2030        iterators._structure(msg, sfp)
2031        eq(sfp.getvalue(), """\
2032multipart/mixed
2033    multipart/mixed
2034        multipart/alternative
2035        application/octet-stream
2036        application/octet-stream
2037    text/plain
2038""")
2039
2040    def test_boundary_in_non_multipart(self):
2041        msg = self._msgobj('msg_40.txt')
2042        self.assertEqual(msg.as_string(), '''\
2043MIME-Version: 1.0
2044Content-Type: text/html; boundary="--961284236552522269"
2045
2046----961284236552522269
2047Content-Type: text/html;
2048Content-Transfer-Encoding: 7Bit
2049
2050<html></html>
2051
2052----961284236552522269--
2053''')
2054
2055    def test_boundary_with_leading_space(self):
2056        eq = self.assertEqual
2057        msg = email.message_from_string('''\
2058MIME-Version: 1.0
2059Content-Type: multipart/mixed; boundary="    XXXX"
2060
2061--    XXXX
2062Content-Type: text/plain
2063
2064
2065--    XXXX
2066Content-Type: text/plain
2067
2068--    XXXX--
2069''')
2070        self.assertTrue(msg.is_multipart())
2071        eq(msg.get_boundary(), '    XXXX')
2072        eq(len(msg.get_payload()), 2)
2073
2074    def test_boundary_without_trailing_newline(self):
2075        m = Parser().parsestr("""\
2076Content-Type: multipart/mixed; boundary="===============0012394164=="
2077MIME-Version: 1.0
2078
2079--===============0012394164==
2080Content-Type: image/file1.jpg
2081MIME-Version: 1.0
2082Content-Transfer-Encoding: base64
2083
2084YXNkZg==
2085--===============0012394164==--""")
2086        self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
2087
2088    def test_mimebase_default_policy(self):
2089        m = MIMEBase('multipart', 'mixed')
2090        self.assertIs(m.policy, email.policy.compat32)
2091
2092    def test_mimebase_custom_policy(self):
2093        m = MIMEBase('multipart', 'mixed', policy=email.policy.default)
2094        self.assertIs(m.policy, email.policy.default)
2095
2096# Test some badly formatted messages
2097class TestNonConformant(TestEmailBase):
2098
2099    def test_parse_missing_minor_type(self):
2100        eq = self.assertEqual
2101        msg = self._msgobj('msg_14.txt')
2102        eq(msg.get_content_type(), 'text/plain')
2103        eq(msg.get_content_maintype(), 'text')
2104        eq(msg.get_content_subtype(), 'plain')
2105
2106    # test_defect_handling
2107    def test_same_boundary_inner_outer(self):
2108        msg = self._msgobj('msg_15.txt')
2109        # XXX We can probably eventually do better
2110        inner = msg.get_payload(0)
2111        self.assertTrue(hasattr(inner, 'defects'))
2112        self.assertEqual(len(inner.defects), 1)
2113        self.assertIsInstance(inner.defects[0],
2114                              errors.StartBoundaryNotFoundDefect)
2115
2116    # test_defect_handling
2117    def test_multipart_no_boundary(self):
2118        msg = self._msgobj('msg_25.txt')
2119        self.assertIsInstance(msg.get_payload(), str)
2120        self.assertEqual(len(msg.defects), 2)
2121        self.assertIsInstance(msg.defects[0],
2122                              errors.NoBoundaryInMultipartDefect)
2123        self.assertIsInstance(msg.defects[1],
2124                              errors.MultipartInvariantViolationDefect)
2125
2126    multipart_msg = textwrap.dedent("""\
2127        Date: Wed, 14 Nov 2007 12:56:23 GMT
2128        From: foo@bar.invalid
2129        To: foo@bar.invalid
2130        Subject: Content-Transfer-Encoding: base64 and multipart
2131        MIME-Version: 1.0
2132        Content-Type: multipart/mixed;
2133            boundary="===============3344438784458119861=="{}
2134
2135        --===============3344438784458119861==
2136        Content-Type: text/plain
2137
2138        Test message
2139
2140        --===============3344438784458119861==
2141        Content-Type: application/octet-stream
2142        Content-Transfer-Encoding: base64
2143
2144        YWJj
2145
2146        --===============3344438784458119861==--
2147        """)
2148
2149    # test_defect_handling
2150    def test_multipart_invalid_cte(self):
2151        msg = self._str_msg(
2152            self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2153        self.assertEqual(len(msg.defects), 1)
2154        self.assertIsInstance(msg.defects[0],
2155            errors.InvalidMultipartContentTransferEncodingDefect)
2156
2157    # test_defect_handling
2158    def test_multipart_no_cte_no_defect(self):
2159        msg = self._str_msg(self.multipart_msg.format(''))
2160        self.assertEqual(len(msg.defects), 0)
2161
2162    # test_defect_handling
2163    def test_multipart_valid_cte_no_defect(self):
2164        for cte in ('7bit', '8bit', 'BINary'):
2165            msg = self._str_msg(
2166                self.multipart_msg.format(
2167                    "\nContent-Transfer-Encoding: {}".format(cte)))
2168            self.assertEqual(len(msg.defects), 0)
2169
2170    # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
2171    def test_invalid_content_type(self):
2172        eq = self.assertEqual
2173        neq = self.ndiffAssertEqual
2174        msg = Message()
2175        # RFC 2045, $5.2 says invalid yields text/plain
2176        msg['Content-Type'] = 'text'
2177        eq(msg.get_content_maintype(), 'text')
2178        eq(msg.get_content_subtype(), 'plain')
2179        eq(msg.get_content_type(), 'text/plain')
2180        # Clear the old value and try something /really/ invalid
2181        del msg['content-type']
2182        msg['Content-Type'] = 'foo'
2183        eq(msg.get_content_maintype(), 'text')
2184        eq(msg.get_content_subtype(), 'plain')
2185        eq(msg.get_content_type(), 'text/plain')
2186        # Still, make sure that the message is idempotently generated
2187        s = StringIO()
2188        g = Generator(s)
2189        g.flatten(msg)
2190        neq(s.getvalue(), 'Content-Type: foo\n\n')
2191
2192    def test_no_start_boundary(self):
2193        eq = self.ndiffAssertEqual
2194        msg = self._msgobj('msg_31.txt')
2195        eq(msg.get_payload(), """\
2196--BOUNDARY
2197Content-Type: text/plain
2198
2199message 1
2200
2201--BOUNDARY
2202Content-Type: text/plain
2203
2204message 2
2205
2206--BOUNDARY--
2207""")
2208
2209    def test_no_separating_blank_line(self):
2210        eq = self.ndiffAssertEqual
2211        msg = self._msgobj('msg_35.txt')
2212        eq(msg.as_string(), """\
2213From: aperson@dom.ain
2214To: bperson@dom.ain
2215Subject: here's something interesting
2216
2217counter to RFC 2822, there's no separating newline here
2218""")
2219
2220    # test_defect_handling
2221    def test_lying_multipart(self):
2222        msg = self._msgobj('msg_41.txt')
2223        self.assertTrue(hasattr(msg, 'defects'))
2224        self.assertEqual(len(msg.defects), 2)
2225        self.assertIsInstance(msg.defects[0],
2226                              errors.NoBoundaryInMultipartDefect)
2227        self.assertIsInstance(msg.defects[1],
2228                              errors.MultipartInvariantViolationDefect)
2229
2230    # test_defect_handling
2231    def test_missing_start_boundary(self):
2232        outer = self._msgobj('msg_42.txt')
2233        # The message structure is:
2234        #
2235        # multipart/mixed
2236        #    text/plain
2237        #    message/rfc822
2238        #        multipart/mixed [*]
2239        #
2240        # [*] This message is missing its start boundary
2241        bad = outer.get_payload(1).get_payload(0)
2242        self.assertEqual(len(bad.defects), 1)
2243        self.assertIsInstance(bad.defects[0],
2244                              errors.StartBoundaryNotFoundDefect)
2245
2246    # test_defect_handling
2247    def test_first_line_is_continuation_header(self):
2248        eq = self.assertEqual
2249        m = ' Line 1\nSubject: test\n\nbody'
2250        msg = email.message_from_string(m)
2251        eq(msg.keys(), ['Subject'])
2252        eq(msg.get_payload(), 'body')
2253        eq(len(msg.defects), 1)
2254        self.assertDefectsEqual(msg.defects,
2255                                 [errors.FirstHeaderLineIsContinuationDefect])
2256        eq(msg.defects[0].line, ' Line 1\n')
2257
2258    # test_defect_handling
2259    def test_missing_header_body_separator(self):
2260        # Our heuristic if we see a line that doesn't look like a header (no
2261        # leading whitespace but no ':') is to assume that the blank line that
2262        # separates the header from the body is missing, and to stop parsing
2263        # headers and start parsing the body.
2264        msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2265        self.assertEqual(msg.keys(), ['Subject'])
2266        self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2267        self.assertDefectsEqual(msg.defects,
2268                                [errors.MissingHeaderBodySeparatorDefect])
2269
2270
2271# Test RFC 2047 header encoding and decoding
2272class TestRFC2047(TestEmailBase):
2273    def test_rfc2047_multiline(self):
2274        eq = self.assertEqual
2275        s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2276 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2277        dh = decode_header(s)
2278        eq(dh, [
2279            (b'Re: ', None),
2280            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2281            (b' baz foo bar ', None),
2282            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2283        header = make_header(dh)
2284        eq(str(header),
2285           'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
2286        self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
2287Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2288 =?mac-iceland?q?=9Arg=8Cs?=""")
2289
2290    def test_whitespace_keeper_unicode(self):
2291        eq = self.assertEqual
2292        s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2293        dh = decode_header(s)
2294        eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2295                (b' Pirard <pirard@dom.ain>', None)])
2296        header = str(make_header(dh))
2297        eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2298
2299    def test_whitespace_keeper_unicode_2(self):
2300        eq = self.assertEqual
2301        s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2302        dh = decode_header(s)
2303        eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2304                (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
2305        hu = str(make_header(dh))
2306        eq(hu, 'The quick brown fox jumped over the lazy dog')
2307
2308    def test_rfc2047_missing_whitespace(self):
2309        s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2310        dh = decode_header(s)
2311        self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2312                              (b'rg', None), (b'\xe5', 'iso-8859-1'),
2313                              (b'sbord', None)])
2314
2315    def test_rfc2047_with_whitespace(self):
2316        s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2317        dh = decode_header(s)
2318        self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2319                              (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2320                              (b' sbord', None)])
2321
2322    def test_rfc2047_B_bad_padding(self):
2323        s = '=?iso-8859-1?B?%s?='
2324        data = [                                # only test complete bytes
2325            ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2326            ('dmk=', b'vi'), ('dmk', b'vi')
2327          ]
2328        for q, a in data:
2329            dh = decode_header(s % q)
2330            self.assertEqual(dh, [(a, 'iso-8859-1')])
2331
2332    def test_rfc2047_Q_invalid_digits(self):
2333        # issue 10004.
2334        s = '=?iso-8859-1?Q?andr=e9=zz?='
2335        self.assertEqual(decode_header(s),
2336                        [(b'andr\xe9=zz', 'iso-8859-1')])
2337
2338    def test_rfc2047_rfc2047_1(self):
2339        # 1st testcase at end of rfc2047
2340        s = '(=?ISO-8859-1?Q?a?=)'
2341        self.assertEqual(decode_header(s),
2342            [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2343
2344    def test_rfc2047_rfc2047_2(self):
2345        # 2nd testcase at end of rfc2047
2346        s = '(=?ISO-8859-1?Q?a?= b)'
2347        self.assertEqual(decode_header(s),
2348            [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2349
2350    def test_rfc2047_rfc2047_3(self):
2351        # 3rd testcase at end of rfc2047
2352        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2353        self.assertEqual(decode_header(s),
2354            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2355
2356    def test_rfc2047_rfc2047_4(self):
2357        # 4th testcase at end of rfc2047
2358        s = '(=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=)'
2359        self.assertEqual(decode_header(s),
2360            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2361
2362    def test_rfc2047_rfc2047_5a(self):
2363        # 5th testcase at end of rfc2047 newline is \r\n
2364        s = '(=?ISO-8859-1?Q?a?=\r\n    =?ISO-8859-1?Q?b?=)'
2365        self.assertEqual(decode_header(s),
2366            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2367
2368    def test_rfc2047_rfc2047_5b(self):
2369        # 5th testcase at end of rfc2047 newline is \n
2370        s = '(=?ISO-8859-1?Q?a?=\n    =?ISO-8859-1?Q?b?=)'
2371        self.assertEqual(decode_header(s),
2372            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2373
2374    def test_rfc2047_rfc2047_6(self):
2375        # 6th testcase at end of rfc2047
2376        s = '(=?ISO-8859-1?Q?a_b?=)'
2377        self.assertEqual(decode_header(s),
2378            [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2379
2380    def test_rfc2047_rfc2047_7(self):
2381        # 7th testcase at end of rfc2047
2382        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2383        self.assertEqual(decode_header(s),
2384            [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2385             (b')', None)])
2386        self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2387        self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2388
2389    def test_multiline_header(self):
2390        s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2391        self.assertEqual(decode_header(s),
2392            [(b'"M\xfcller T"', 'windows-1252'),
2393             (b'<T.Mueller@xxx.com>', None)])
2394        self.assertEqual(make_header(decode_header(s)).encode(),
2395                         ''.join(s.splitlines()))
2396        self.assertEqual(str(make_header(decode_header(s))),
2397                         '"Müller T" <T.Mueller@xxx.com>')
2398
2399
2400# Test the MIMEMessage class
2401class TestMIMEMessage(TestEmailBase):
2402    def setUp(self):
2403        with openfile('msg_11.txt') as fp:
2404            self._text = fp.read()
2405
2406    def test_type_error(self):
2407        self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2408
2409    def test_valid_argument(self):
2410        eq = self.assertEqual
2411        subject = 'A sub-message'
2412        m = Message()
2413        m['Subject'] = subject
2414        r = MIMEMessage(m)
2415        eq(r.get_content_type(), 'message/rfc822')
2416        payload = r.get_payload()
2417        self.assertIsInstance(payload, list)
2418        eq(len(payload), 1)
2419        subpart = payload[0]
2420        self.assertIs(subpart, m)
2421        eq(subpart['subject'], subject)
2422
2423    def test_bad_multipart(self):
2424        msg1 = Message()
2425        msg1['Subject'] = 'subpart 1'
2426        msg2 = Message()
2427        msg2['Subject'] = 'subpart 2'
2428        r = MIMEMessage(msg1)
2429        self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2430
2431    def test_generate(self):
2432        # First craft the message to be encapsulated
2433        m = Message()
2434        m['Subject'] = 'An enclosed message'
2435        m.set_payload('Here is the body of the message.\n')
2436        r = MIMEMessage(m)
2437        r['Subject'] = 'The enclosing message'
2438        s = StringIO()
2439        g = Generator(s)
2440        g.flatten(r)
2441        self.assertEqual(s.getvalue(), """\
2442Content-Type: message/rfc822
2443MIME-Version: 1.0
2444Subject: The enclosing message
2445
2446Subject: An enclosed message
2447
2448Here is the body of the message.
2449""")
2450
2451    def test_parse_message_rfc822(self):
2452        eq = self.assertEqual
2453        msg = self._msgobj('msg_11.txt')
2454        eq(msg.get_content_type(), 'message/rfc822')
2455        payload = msg.get_payload()
2456        self.assertIsInstance(payload, list)
2457        eq(len(payload), 1)
2458        submsg = payload[0]
2459        self.assertIsInstance(submsg, Message)
2460        eq(submsg['subject'], 'An enclosed message')
2461        eq(submsg.get_payload(), 'Here is the body of the message.\n')
2462
2463    def test_dsn(self):
2464        eq = self.assertEqual
2465        # msg 16 is a Delivery Status Notification, see RFC 1894
2466        msg = self._msgobj('msg_16.txt')
2467        eq(msg.get_content_type(), 'multipart/report')
2468        self.assertTrue(msg.is_multipart())
2469        eq(len(msg.get_payload()), 3)
2470        # Subpart 1 is a text/plain, human readable section
2471        subpart = msg.get_payload(0)
2472        eq(subpart.get_content_type(), 'text/plain')
2473        eq(subpart.get_payload(), """\
2474This report relates to a message you sent with the following header fields:
2475
2476  Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2477  Date: Sun, 23 Sep 2001 20:10:55 -0700
2478  From: "Ian T. Henry" <henryi@oxy.edu>
2479  To: SoCal Raves <scr@socal-raves.org>
2480  Subject: [scr] yeah for Ians!!
2481
2482Your message cannot be delivered to the following recipients:
2483
2484  Recipient address: jangel1@cougar.noc.ucla.edu
2485  Reason: recipient reached disk quota
2486
2487""")
2488        # Subpart 2 contains the machine parsable DSN information.  It
2489        # consists of two blocks of headers, represented by two nested Message
2490        # objects.
2491        subpart = msg.get_payload(1)
2492        eq(subpart.get_content_type(), 'message/delivery-status')
2493        eq(len(subpart.get_payload()), 2)
2494        # message/delivery-status should treat each block as a bunch of
2495        # headers, i.e. a bunch of Message objects.
2496        dsn1 = subpart.get_payload(0)
2497        self.assertIsInstance(dsn1, Message)
2498        eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2499        eq(dsn1.get_param('dns', header='reporting-mta'), '')
2500        # Try a missing one <wink>
2501        eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2502        dsn2 = subpart.get_payload(1)
2503        self.assertIsInstance(dsn2, Message)
2504        eq(dsn2['action'], 'failed')
2505        eq(dsn2.get_params(header='original-recipient'),
2506           [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2507        eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2508        # Subpart 3 is the original message
2509        subpart = msg.get_payload(2)
2510        eq(subpart.get_content_type(), 'message/rfc822')
2511        payload = subpart.get_payload()
2512        self.assertIsInstance(payload, list)
2513        eq(len(payload), 1)
2514        subsubpart = payload[0]
2515        self.assertIsInstance(subsubpart, Message)
2516        eq(subsubpart.get_content_type(), 'text/plain')
2517        eq(subsubpart['message-id'],
2518           '<002001c144a6$8752e060$56104586@oxy.edu>')
2519
2520    def test_epilogue(self):
2521        eq = self.ndiffAssertEqual
2522        with openfile('msg_21.txt') as fp:
2523            text = fp.read()
2524        msg = Message()
2525        msg['From'] = 'aperson@dom.ain'
2526        msg['To'] = 'bperson@dom.ain'
2527        msg['Subject'] = 'Test'
2528        msg.preamble = 'MIME message'
2529        msg.epilogue = 'End of MIME message\n'
2530        msg1 = MIMEText('One')
2531        msg2 = MIMEText('Two')
2532        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2533        msg.attach(msg1)
2534        msg.attach(msg2)
2535        sfp = StringIO()
2536        g = Generator(sfp)
2537        g.flatten(msg)
2538        eq(sfp.getvalue(), text)
2539
2540    def test_no_nl_preamble(self):
2541        eq = self.ndiffAssertEqual
2542        msg = Message()
2543        msg['From'] = 'aperson@dom.ain'
2544        msg['To'] = 'bperson@dom.ain'
2545        msg['Subject'] = 'Test'
2546        msg.preamble = 'MIME message'
2547        msg.epilogue = ''
2548        msg1 = MIMEText('One')
2549        msg2 = MIMEText('Two')
2550        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2551        msg.attach(msg1)
2552        msg.attach(msg2)
2553        eq(msg.as_string(), """\
2554From: aperson@dom.ain
2555To: bperson@dom.ain
2556Subject: Test
2557Content-Type: multipart/mixed; boundary="BOUNDARY"
2558
2559MIME message
2560--BOUNDARY
2561Content-Type: text/plain; charset="us-ascii"
2562MIME-Version: 1.0
2563Content-Transfer-Encoding: 7bit
2564
2565One
2566--BOUNDARY
2567Content-Type: text/plain; charset="us-ascii"
2568MIME-Version: 1.0
2569Content-Transfer-Encoding: 7bit
2570
2571Two
2572--BOUNDARY--
2573""")
2574
2575    def test_default_type(self):
2576        eq = self.assertEqual
2577        with openfile('msg_30.txt') as fp:
2578            msg = email.message_from_file(fp)
2579        container1 = msg.get_payload(0)
2580        eq(container1.get_default_type(), 'message/rfc822')
2581        eq(container1.get_content_type(), 'message/rfc822')
2582        container2 = msg.get_payload(1)
2583        eq(container2.get_default_type(), 'message/rfc822')
2584        eq(container2.get_content_type(), 'message/rfc822')
2585        container1a = container1.get_payload(0)
2586        eq(container1a.get_default_type(), 'text/plain')
2587        eq(container1a.get_content_type(), 'text/plain')
2588        container2a = container2.get_payload(0)
2589        eq(container2a.get_default_type(), 'text/plain')
2590        eq(container2a.get_content_type(), 'text/plain')
2591
2592    def test_default_type_with_explicit_container_type(self):
2593        eq = self.assertEqual
2594        with openfile('msg_28.txt') as fp:
2595            msg = email.message_from_file(fp)
2596        container1 = msg.get_payload(0)
2597        eq(container1.get_default_type(), 'message/rfc822')
2598        eq(container1.get_content_type(), 'message/rfc822')
2599        container2 = msg.get_payload(1)
2600        eq(container2.get_default_type(), 'message/rfc822')
2601        eq(container2.get_content_type(), 'message/rfc822')
2602        container1a = container1.get_payload(0)
2603        eq(container1a.get_default_type(), 'text/plain')
2604        eq(container1a.get_content_type(), 'text/plain')
2605        container2a = container2.get_payload(0)
2606        eq(container2a.get_default_type(), 'text/plain')
2607        eq(container2a.get_content_type(), 'text/plain')
2608
2609    def test_default_type_non_parsed(self):
2610        eq = self.assertEqual
2611        neq = self.ndiffAssertEqual
2612        # Set up container
2613        container = MIMEMultipart('digest', 'BOUNDARY')
2614        container.epilogue = ''
2615        # Set up subparts
2616        subpart1a = MIMEText('message 1\n')
2617        subpart2a = MIMEText('message 2\n')
2618        subpart1 = MIMEMessage(subpart1a)
2619        subpart2 = MIMEMessage(subpart2a)
2620        container.attach(subpart1)
2621        container.attach(subpart2)
2622        eq(subpart1.get_content_type(), 'message/rfc822')
2623        eq(subpart1.get_default_type(), 'message/rfc822')
2624        eq(subpart2.get_content_type(), 'message/rfc822')
2625        eq(subpart2.get_default_type(), 'message/rfc822')
2626        neq(container.as_string(0), '''\
2627Content-Type: multipart/digest; boundary="BOUNDARY"
2628MIME-Version: 1.0
2629
2630--BOUNDARY
2631Content-Type: message/rfc822
2632MIME-Version: 1.0
2633
2634Content-Type: text/plain; charset="us-ascii"
2635MIME-Version: 1.0
2636Content-Transfer-Encoding: 7bit
2637
2638message 1
2639
2640--BOUNDARY
2641Content-Type: message/rfc822
2642MIME-Version: 1.0
2643
2644Content-Type: text/plain; charset="us-ascii"
2645MIME-Version: 1.0
2646Content-Transfer-Encoding: 7bit
2647
2648message 2
2649
2650--BOUNDARY--
2651''')
2652        del subpart1['content-type']
2653        del subpart1['mime-version']
2654        del subpart2['content-type']
2655        del subpart2['mime-version']
2656        eq(subpart1.get_content_type(), 'message/rfc822')
2657        eq(subpart1.get_default_type(), 'message/rfc822')
2658        eq(subpart2.get_content_type(), 'message/rfc822')
2659        eq(subpart2.get_default_type(), 'message/rfc822')
2660        neq(container.as_string(0), '''\
2661Content-Type: multipart/digest; boundary="BOUNDARY"
2662MIME-Version: 1.0
2663
2664--BOUNDARY
2665
2666Content-Type: text/plain; charset="us-ascii"
2667MIME-Version: 1.0
2668Content-Transfer-Encoding: 7bit
2669
2670message 1
2671
2672--BOUNDARY
2673
2674Content-Type: text/plain; charset="us-ascii"
2675MIME-Version: 1.0
2676Content-Transfer-Encoding: 7bit
2677
2678message 2
2679
2680--BOUNDARY--
2681''')
2682
2683    def test_mime_attachments_in_constructor(self):
2684        eq = self.assertEqual
2685        text1 = MIMEText('')
2686        text2 = MIMEText('')
2687        msg = MIMEMultipart(_subparts=(text1, text2))
2688        eq(len(msg.get_payload()), 2)
2689        eq(msg.get_payload(0), text1)
2690        eq(msg.get_payload(1), text2)
2691
2692    def test_default_multipart_constructor(self):
2693        msg = MIMEMultipart()
2694        self.assertTrue(msg.is_multipart())
2695
2696    def test_multipart_default_policy(self):
2697        msg = MIMEMultipart()
2698        msg['To'] = 'a@b.com'
2699        msg['To'] = 'c@d.com'
2700        self.assertEqual(msg.get_all('to'), ['a@b.com', 'c@d.com'])
2701
2702    def test_multipart_custom_policy(self):
2703        msg = MIMEMultipart(policy=email.policy.default)
2704        msg['To'] = 'a@b.com'
2705        with self.assertRaises(ValueError) as cm:
2706            msg['To'] = 'c@d.com'
2707        self.assertEqual(str(cm.exception),
2708                         'There may be at most 1 To headers in a message')
2709
2710# A general test of parser->model->generator idempotency.  IOW, read a message
2711# in, parse it into a message object tree, then without touching the tree,
2712# regenerate the plain text.  The original text and the transformed text
2713# should be identical.  Note: that we ignore the Unix-From since that may
2714# contain a changed date.
2715class TestIdempotent(TestEmailBase):
2716
2717    linesep = '\n'
2718
2719    def _msgobj(self, filename):
2720        with openfile(filename) as fp:
2721            data = fp.read()
2722        msg = email.message_from_string(data)
2723        return msg, data
2724
2725    def _idempotent(self, msg, text, unixfrom=False):
2726        eq = self.ndiffAssertEqual
2727        s = StringIO()
2728        g = Generator(s, maxheaderlen=0)
2729        g.flatten(msg, unixfrom=unixfrom)
2730        eq(text, s.getvalue())
2731
2732    def test_parse_text_message(self):
2733        eq = self.assertEqual
2734        msg, text = self._msgobj('msg_01.txt')
2735        eq(msg.get_content_type(), 'text/plain')
2736        eq(msg.get_content_maintype(), 'text')
2737        eq(msg.get_content_subtype(), 'plain')
2738        eq(msg.get_params()[1], ('charset', 'us-ascii'))
2739        eq(msg.get_param('charset'), 'us-ascii')
2740        eq(msg.preamble, None)
2741        eq(msg.epilogue, None)
2742        self._idempotent(msg, text)
2743
2744    def test_parse_untyped_message(self):
2745        eq = self.assertEqual
2746        msg, text = self._msgobj('msg_03.txt')
2747        eq(msg.get_content_type(), 'text/plain')
2748        eq(msg.get_params(), None)
2749        eq(msg.get_param('charset'), None)
2750        self._idempotent(msg, text)
2751
2752    def test_simple_multipart(self):
2753        msg, text = self._msgobj('msg_04.txt')
2754        self._idempotent(msg, text)
2755
2756    def test_MIME_digest(self):
2757        msg, text = self._msgobj('msg_02.txt')
2758        self._idempotent(msg, text)
2759
2760    def test_long_header(self):
2761        msg, text = self._msgobj('msg_27.txt')
2762        self._idempotent(msg, text)
2763
2764    def test_MIME_digest_with_part_headers(self):
2765        msg, text = self._msgobj('msg_28.txt')
2766        self._idempotent(msg, text)
2767
2768    def test_mixed_with_image(self):
2769        msg, text = self._msgobj('msg_06.txt')
2770        self._idempotent(msg, text)
2771
2772    def test_multipart_report(self):
2773        msg, text = self._msgobj('msg_05.txt')
2774        self._idempotent(msg, text)
2775
2776    def test_dsn(self):
2777        msg, text = self._msgobj('msg_16.txt')
2778        self._idempotent(msg, text)
2779
2780    def test_preamble_epilogue(self):
2781        msg, text = self._msgobj('msg_21.txt')
2782        self._idempotent(msg, text)
2783
2784    def test_multipart_one_part(self):
2785        msg, text = self._msgobj('msg_23.txt')
2786        self._idempotent(msg, text)
2787
2788    def test_multipart_no_parts(self):
2789        msg, text = self._msgobj('msg_24.txt')
2790        self._idempotent(msg, text)
2791
2792    def test_no_start_boundary(self):
2793        msg, text = self._msgobj('msg_31.txt')
2794        self._idempotent(msg, text)
2795
2796    def test_rfc2231_charset(self):
2797        msg, text = self._msgobj('msg_32.txt')
2798        self._idempotent(msg, text)
2799
2800    def test_more_rfc2231_parameters(self):
2801        msg, text = self._msgobj('msg_33.txt')
2802        self._idempotent(msg, text)
2803
2804    def test_text_plain_in_a_multipart_digest(self):
2805        msg, text = self._msgobj('msg_34.txt')
2806        self._idempotent(msg, text)
2807
2808    def test_nested_multipart_mixeds(self):
2809        msg, text = self._msgobj('msg_12a.txt')
2810        self._idempotent(msg, text)
2811
2812    def test_message_external_body_idempotent(self):
2813        msg, text = self._msgobj('msg_36.txt')
2814        self._idempotent(msg, text)
2815
2816    def test_message_delivery_status(self):
2817        msg, text = self._msgobj('msg_43.txt')
2818        self._idempotent(msg, text, unixfrom=True)
2819
2820    def test_message_signed_idempotent(self):
2821        msg, text = self._msgobj('msg_45.txt')
2822        self._idempotent(msg, text)
2823
2824    def test_content_type(self):
2825        eq = self.assertEqual
2826        # Get a message object and reset the seek pointer for other tests
2827        msg, text = self._msgobj('msg_05.txt')
2828        eq(msg.get_content_type(), 'multipart/report')
2829        # Test the Content-Type: parameters
2830        params = {}
2831        for pk, pv in msg.get_params():
2832            params[pk] = pv
2833        eq(params['report-type'], 'delivery-status')
2834        eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2835        eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2836        eq(msg.epilogue, self.linesep)
2837        eq(len(msg.get_payload()), 3)
2838        # Make sure the subparts are what we expect
2839        msg1 = msg.get_payload(0)
2840        eq(msg1.get_content_type(), 'text/plain')
2841        eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
2842        msg2 = msg.get_payload(1)
2843        eq(msg2.get_content_type(), 'text/plain')
2844        eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
2845        msg3 = msg.get_payload(2)
2846        eq(msg3.get_content_type(), 'message/rfc822')
2847        self.assertIsInstance(msg3, Message)
2848        payload = msg3.get_payload()
2849        self.assertIsInstance(payload, list)
2850        eq(len(payload), 1)
2851        msg4 = payload[0]
2852        self.assertIsInstance(msg4, Message)
2853        eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
2854
2855    def test_parser(self):
2856        eq = self.assertEqual
2857        msg, text = self._msgobj('msg_06.txt')
2858        # Check some of the outer headers
2859        eq(msg.get_content_type(), 'message/rfc822')
2860        # Make sure the payload is a list of exactly one sub-Message, and that
2861        # that submessage has a type of text/plain
2862        payload = msg.get_payload()
2863        self.assertIsInstance(payload, list)
2864        eq(len(payload), 1)
2865        msg1 = payload[0]
2866        self.assertIsInstance(msg1, Message)
2867        eq(msg1.get_content_type(), 'text/plain')
2868        self.assertIsInstance(msg1.get_payload(), str)
2869        eq(msg1.get_payload(), self.linesep)
2870
2871
2872
2873# Test various other bits of the package's functionality
2874class TestMiscellaneous(TestEmailBase):
2875    def test_message_from_string(self):
2876        with openfile('msg_01.txt') as fp:
2877            text = fp.read()
2878        msg = email.message_from_string(text)
2879        s = StringIO()
2880        # Don't wrap/continue long headers since we're trying to test
2881        # idempotency.
2882        g = Generator(s, maxheaderlen=0)
2883        g.flatten(msg)
2884        self.assertEqual(text, s.getvalue())
2885
2886    def test_message_from_file(self):
2887        with openfile('msg_01.txt') as fp:
2888            text = fp.read()
2889            fp.seek(0)
2890            msg = email.message_from_file(fp)
2891            s = StringIO()
2892            # Don't wrap/continue long headers since we're trying to test
2893            # idempotency.
2894            g = Generator(s, maxheaderlen=0)
2895            g.flatten(msg)
2896            self.assertEqual(text, s.getvalue())
2897
2898    def test_message_from_string_with_class(self):
2899        with openfile('msg_01.txt') as fp:
2900            text = fp.read()
2901
2902        # Create a subclass
2903        class MyMessage(Message):
2904            pass
2905
2906        msg = email.message_from_string(text, MyMessage)
2907        self.assertIsInstance(msg, MyMessage)
2908        # Try something more complicated
2909        with openfile('msg_02.txt') as fp:
2910            text = fp.read()
2911        msg = email.message_from_string(text, MyMessage)
2912        for subpart in msg.walk():
2913            self.assertIsInstance(subpart, MyMessage)
2914
2915    def test_message_from_file_with_class(self):
2916        # Create a subclass
2917        class MyMessage(Message):
2918            pass
2919
2920        with openfile('msg_01.txt') as fp:
2921            msg = email.message_from_file(fp, MyMessage)
2922        self.assertIsInstance(msg, MyMessage)
2923        # Try something more complicated
2924        with openfile('msg_02.txt') as fp:
2925            msg = email.message_from_file(fp, MyMessage)
2926        for subpart in msg.walk():
2927            self.assertIsInstance(subpart, MyMessage)
2928
2929    def test_custom_message_does_not_require_arguments(self):
2930        class MyMessage(Message):
2931            def __init__(self):
2932                super().__init__()
2933        msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2934        self.assertIsInstance(msg, MyMessage)
2935
2936    def test__all__(self):
2937        module = __import__('email')
2938        self.assertEqual(sorted(module.__all__), [
2939            'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2940            'generator', 'header', 'iterators', 'message',
2941            'message_from_binary_file', 'message_from_bytes',
2942            'message_from_file', 'message_from_string', 'mime', 'parser',
2943            'quoprimime', 'utils',
2944            ])
2945
2946    def test_formatdate(self):
2947        now = time.time()
2948        self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2949                         time.gmtime(now)[:6])
2950
2951    def test_formatdate_localtime(self):
2952        now = time.time()
2953        self.assertEqual(
2954            utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2955            time.localtime(now)[:6])
2956
2957    def test_formatdate_usegmt(self):
2958        now = time.time()
2959        self.assertEqual(
2960            utils.formatdate(now, localtime=False),
2961            time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2962        self.assertEqual(
2963            utils.formatdate(now, localtime=False, usegmt=True),
2964            time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2965
2966    # parsedate and parsedate_tz will become deprecated interfaces someday
2967    def test_parsedate_returns_None_for_invalid_strings(self):
2968        self.assertIsNone(utils.parsedate(''))
2969        self.assertIsNone(utils.parsedate_tz(''))
2970        self.assertIsNone(utils.parsedate('0'))
2971        self.assertIsNone(utils.parsedate_tz('0'))
2972        self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2973        self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2974        # Not a part of the spec but, but this has historically worked:
2975        self.assertIsNone(utils.parsedate(None))
2976        self.assertIsNone(utils.parsedate_tz(None))
2977
2978    def test_parsedate_compact(self):
2979        # The FWS after the comma is optional
2980        self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2981                         utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2982
2983    def test_parsedate_no_dayofweek(self):
2984        eq = self.assertEqual
2985        eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2986           (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2987
2988    def test_parsedate_compact_no_dayofweek(self):
2989        eq = self.assertEqual
2990        eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2991           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2992
2993    def test_parsedate_no_space_before_positive_offset(self):
2994        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2995           (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2996
2997    def test_parsedate_no_space_before_negative_offset(self):
2998        # Issue 1155362: we already handled '+' for this case.
2999        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
3000           (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
3001
3002
3003    def test_parsedate_accepts_time_with_dots(self):
3004        eq = self.assertEqual
3005        eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
3006           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3007        eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
3008           (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
3009
3010    def test_parsedate_acceptable_to_time_functions(self):
3011        eq = self.assertEqual
3012        timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
3013        t = int(time.mktime(timetup))
3014        eq(time.localtime(t)[:6], timetup[:6])
3015        eq(int(time.strftime('%Y', timetup)), 2003)
3016        timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
3017        t = int(time.mktime(timetup[:9]))
3018        eq(time.localtime(t)[:6], timetup[:6])
3019        eq(int(time.strftime('%Y', timetup[:9])), 2003)
3020
3021    def test_mktime_tz(self):
3022        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3023                                          -1, -1, -1, 0)), 0)
3024        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3025                                          -1, -1, -1, 1234)), -1234)
3026
3027    def test_parsedate_y2k(self):
3028        """Test for parsing a date with a two-digit year.
3029
3030        Parsing a date with a two-digit year should return the correct
3031        four-digit year. RFC822 allows two-digit years, but RFC2822 (which
3032        obsoletes RFC822) requires four-digit years.
3033
3034        """
3035        self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
3036                         utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
3037        self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
3038                         utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
3039
3040    def test_parseaddr_empty(self):
3041        self.assertEqual(utils.parseaddr('<>'), ('', ''))
3042        self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
3043
3044    def test_parseaddr_multiple_domains(self):
3045        self.assertEqual(
3046            utils.parseaddr('a@b@c'),
3047            ('', '')
3048        )
3049        self.assertEqual(
3050            utils.parseaddr('a@b.c@c'),
3051            ('', '')
3052        )
3053        self.assertEqual(
3054            utils.parseaddr('a@172.17.0.1@c'),
3055            ('', '')
3056        )
3057
3058    def test_noquote_dump(self):
3059        self.assertEqual(
3060            utils.formataddr(('A Silly Person', 'person@dom.ain')),
3061            'A Silly Person <person@dom.ain>')
3062
3063    def test_escape_dump(self):
3064        self.assertEqual(
3065            utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
3066            r'"A (Very) Silly Person" <person@dom.ain>')
3067        self.assertEqual(
3068            utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
3069            ('A (Very) Silly Person', 'person@dom.ain'))
3070        a = r'A \(Special\) Person'
3071        b = 'person@dom.ain'
3072        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3073
3074    def test_escape_backslashes(self):
3075        self.assertEqual(
3076            utils.formataddr((r'Arthur \Backslash\ Foobar', 'person@dom.ain')),
3077            r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
3078        a = r'Arthur \Backslash\ Foobar'
3079        b = 'person@dom.ain'
3080        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3081
3082    def test_quotes_unicode_names(self):
3083        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3084        name = "H\u00e4ns W\u00fcrst"
3085        addr = 'person@dom.ain'
3086        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
3087        latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
3088        self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
3089        self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
3090            latin1_quopri)
3091
3092    def test_accepts_any_charset_like_object(self):
3093        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3094        name = "H\u00e4ns W\u00fcrst"
3095        addr = 'person@dom.ain'
3096        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
3097        foobar = "FOOBAR"
3098        class CharsetMock:
3099            def header_encode(self, string):
3100                return foobar
3101        mock = CharsetMock()
3102        mock_expected = "%s <%s>" % (foobar, addr)
3103        self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
3104        self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
3105            utf8_base64)
3106
3107    def test_invalid_charset_like_object_raises_error(self):
3108        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3109        name = "H\u00e4ns W\u00fcrst"
3110        addr = 'person@dom.ain'
3111        # An object without a header_encode method:
3112        bad_charset = object()
3113        self.assertRaises(AttributeError, utils.formataddr, (name, addr),
3114            bad_charset)
3115
3116    def test_unicode_address_raises_error(self):
3117        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3118        addr = 'pers\u00f6n@dom.in'
3119        self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
3120        self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
3121
3122    def test_name_with_dot(self):
3123        x = 'John X. Doe <jxd@example.com>'
3124        y = '"John X. Doe" <jxd@example.com>'
3125        a, b = ('John X. Doe', 'jxd@example.com')
3126        self.assertEqual(utils.parseaddr(x), (a, b))
3127        self.assertEqual(utils.parseaddr(y), (a, b))
3128        # formataddr() quotes the name if there's a dot in it
3129        self.assertEqual(utils.formataddr((a, b)), y)
3130
3131    def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
3132        # issue 10005.  Note that in the third test the second pair of
3133        # backslashes is not actually a quoted pair because it is not inside a
3134        # comment or quoted string: the address being parsed has a quoted
3135        # string containing a quoted backslash, followed by 'example' and two
3136        # backslashes, followed by another quoted string containing a space and
3137        # the word 'example'.  parseaddr copies those two backslashes
3138        # literally.  Per rfc5322 this is not technically correct since a \ may
3139        # not appear in an address outside of a quoted string.  It is probably
3140        # a sensible Postel interpretation, though.
3141        eq = self.assertEqual
3142        eq(utils.parseaddr('""example" example"@example.com'),
3143          ('', '""example" example"@example.com'))
3144        eq(utils.parseaddr('"\\"example\\" example"@example.com'),
3145          ('', '"\\"example\\" example"@example.com'))
3146        eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
3147          ('', '"\\\\"example\\\\" example"@example.com'))
3148
3149    def test_parseaddr_preserves_spaces_in_local_part(self):
3150        # issue 9286.  A normal RFC5322 local part should not contain any
3151        # folding white space, but legacy local parts can (they are a sequence
3152        # of atoms, not dotatoms).  On the other hand we strip whitespace from
3153        # before the @ and around dots, on the assumption that the whitespace
3154        # around the punctuation is a mistake in what would otherwise be
3155        # an RFC5322 local part.  Leading whitespace is, usual, stripped as well.
3156        self.assertEqual(('', "merwok wok@xample.com"),
3157            utils.parseaddr("merwok wok@xample.com"))
3158        self.assertEqual(('', "merwok  wok@xample.com"),
3159            utils.parseaddr("merwok  wok@xample.com"))
3160        self.assertEqual(('', "merwok  wok@xample.com"),
3161            utils.parseaddr(" merwok  wok  @xample.com"))
3162        self.assertEqual(('', 'merwok"wok"  wok@xample.com'),
3163            utils.parseaddr('merwok"wok"  wok@xample.com'))
3164        self.assertEqual(('', 'merwok.wok.wok@xample.com'),
3165            utils.parseaddr('merwok. wok .  wok@xample.com'))
3166
3167    def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3168        addr = ("'foo@example.com' (foo@example.com)",
3169                'foo@example.com')
3170        addrstr = ('"\'foo@example.com\' '
3171                            '(foo@example.com)" <foo@example.com>')
3172        self.assertEqual(utils.parseaddr(addrstr), addr)
3173        self.assertEqual(utils.formataddr(addr), addrstr)
3174
3175
3176    def test_multiline_from_comment(self):
3177        x = """\
3178Foo
3179\tBar <foo@example.com>"""
3180        self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3181
3182    def test_quote_dump(self):
3183        self.assertEqual(
3184            utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3185            r'"A Silly; Person" <person@dom.ain>')
3186
3187    def test_charset_richcomparisons(self):
3188        eq = self.assertEqual
3189        ne = self.assertNotEqual
3190        cset1 = Charset()
3191        cset2 = Charset()
3192        eq(cset1, 'us-ascii')
3193        eq(cset1, 'US-ASCII')
3194        eq(cset1, 'Us-AsCiI')
3195        eq('us-ascii', cset1)
3196        eq('US-ASCII', cset1)
3197        eq('Us-AsCiI', cset1)
3198        ne(cset1, 'usascii')
3199        ne(cset1, 'USASCII')
3200        ne(cset1, 'UsAsCiI')
3201        ne('usascii', cset1)
3202        ne('USASCII', cset1)
3203        ne('UsAsCiI', cset1)
3204        eq(cset1, cset2)
3205        eq(cset2, cset1)
3206
3207    def test_getaddresses(self):
3208        eq = self.assertEqual
3209        eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3210                               'Bud Person <bperson@dom.ain>']),
3211           [('Al Person', 'aperson@dom.ain'),
3212            ('Bud Person', 'bperson@dom.ain')])
3213
3214    def test_getaddresses_nasty(self):
3215        eq = self.assertEqual
3216        eq(utils.getaddresses(['foo: ;']), [('', '')])
3217        eq(utils.getaddresses(
3218           ['[]*-- =~$']),
3219           [('', ''), ('', ''), ('', '*--')])
3220        eq(utils.getaddresses(
3221           ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3222           [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3223
3224    def test_getaddresses_embedded_comment(self):
3225        """Test proper handling of a nested comment"""
3226        eq = self.assertEqual
3227        addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3228        eq(addrs[0][1], 'foo@bar.com')
3229
3230    def test_make_msgid_collisions(self):
3231        # Test make_msgid uniqueness, even with multiple threads
3232        class MsgidsThread(Thread):
3233            def run(self):
3234                # generate msgids for 3 seconds
3235                self.msgids = []
3236                append = self.msgids.append
3237                make_msgid = utils.make_msgid
3238                clock = time.monotonic
3239                tfin = clock() + 3.0
3240                while clock() < tfin:
3241                    append(make_msgid(domain='testdomain-string'))
3242
3243        threads = [MsgidsThread() for i in range(5)]
3244        with start_threads(threads):
3245            pass
3246        all_ids = sum([t.msgids for t in threads], [])
3247        self.assertEqual(len(set(all_ids)), len(all_ids))
3248
3249    def test_utils_quote_unquote(self):
3250        eq = self.assertEqual
3251        msg = Message()
3252        msg.add_header('content-disposition', 'attachment',
3253                       filename='foo\\wacky"name')
3254        eq(msg.get_filename(), 'foo\\wacky"name')
3255
3256    def test_get_body_encoding_with_bogus_charset(self):
3257        charset = Charset('not a charset')
3258        self.assertEqual(charset.get_body_encoding(), 'base64')
3259
3260    def test_get_body_encoding_with_uppercase_charset(self):
3261        eq = self.assertEqual
3262        msg = Message()
3263        msg['Content-Type'] = 'text/plain; charset=UTF-8'
3264        eq(msg['content-type'], 'text/plain; charset=UTF-8')
3265        charsets = msg.get_charsets()
3266        eq(len(charsets), 1)
3267        eq(charsets[0], 'utf-8')
3268        charset = Charset(charsets[0])
3269        eq(charset.get_body_encoding(), 'base64')
3270        msg.set_payload(b'hello world', charset=charset)
3271        eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3272        eq(msg.get_payload(decode=True), b'hello world')
3273        eq(msg['content-transfer-encoding'], 'base64')
3274        # Try another one
3275        msg = Message()
3276        msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3277        charsets = msg.get_charsets()
3278        eq(len(charsets), 1)
3279        eq(charsets[0], 'us-ascii')
3280        charset = Charset(charsets[0])
3281        eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3282        msg.set_payload('hello world', charset=charset)
3283        eq(msg.get_payload(), 'hello world')
3284        eq(msg['content-transfer-encoding'], '7bit')
3285
3286    def test_charsets_case_insensitive(self):
3287        lc = Charset('us-ascii')
3288        uc = Charset('US-ASCII')
3289        self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3290
3291    def test_partial_falls_inside_message_delivery_status(self):
3292        eq = self.ndiffAssertEqual
3293        # The Parser interface provides chunks of data to FeedParser in 8192
3294        # byte gulps.  SF bug #1076485 found one of those chunks inside
3295        # message/delivery-status header block, which triggered an
3296        # unreadline() of NeedMoreData.
3297        msg = self._msgobj('msg_43.txt')
3298        sfp = StringIO()
3299        iterators._structure(msg, sfp)
3300        eq(sfp.getvalue(), """\
3301multipart/report
3302    text/plain
3303    message/delivery-status
3304        text/plain
3305        text/plain
3306        text/plain
3307        text/plain
3308        text/plain
3309        text/plain
3310        text/plain
3311        text/plain
3312        text/plain
3313        text/plain
3314        text/plain
3315        text/plain
3316        text/plain
3317        text/plain
3318        text/plain
3319        text/plain
3320        text/plain
3321        text/plain
3322        text/plain
3323        text/plain
3324        text/plain
3325        text/plain
3326        text/plain
3327        text/plain
3328        text/plain
3329        text/plain
3330    text/rfc822-headers
3331""")
3332
3333    def test_make_msgid_domain(self):
3334        self.assertEqual(
3335            email.utils.make_msgid(domain='testdomain-string')[-19:],
3336            '@testdomain-string>')
3337
3338    def test_make_msgid_idstring(self):
3339        self.assertEqual(
3340            email.utils.make_msgid(idstring='test-idstring',
3341                domain='testdomain-string')[-33:],
3342            '.test-idstring@testdomain-string>')
3343
3344    def test_make_msgid_default_domain(self):
3345        with patch('socket.getfqdn') as mock_getfqdn:
3346            mock_getfqdn.return_value = domain = 'pythontest.example.com'
3347            self.assertTrue(
3348                email.utils.make_msgid().endswith(
3349                    '@' + domain + '>'))
3350
3351    def test_Generator_linend(self):
3352        # Issue 14645.
3353        with openfile('msg_26.txt', newline='\n') as f:
3354            msgtxt = f.read()
3355        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3356        msg = email.message_from_string(msgtxt)
3357        s = StringIO()
3358        g = email.generator.Generator(s)
3359        g.flatten(msg)
3360        self.assertEqual(s.getvalue(), msgtxt_nl)
3361
3362    def test_BytesGenerator_linend(self):
3363        # Issue 14645.
3364        with openfile('msg_26.txt', newline='\n') as f:
3365            msgtxt = f.read()
3366        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3367        msg = email.message_from_string(msgtxt_nl)
3368        s = BytesIO()
3369        g = email.generator.BytesGenerator(s)
3370        g.flatten(msg, linesep='\r\n')
3371        self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3372
3373    def test_BytesGenerator_linend_with_non_ascii(self):
3374        # Issue 14645.
3375        with openfile('msg_26.txt', 'rb') as f:
3376            msgtxt = f.read()
3377        msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3378        msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3379        msg = email.message_from_bytes(msgtxt_nl)
3380        s = BytesIO()
3381        g = email.generator.BytesGenerator(s)
3382        g.flatten(msg, linesep='\r\n')
3383        self.assertEqual(s.getvalue(), msgtxt)
3384
3385    def test_mime_classes_policy_argument(self):
3386        with openfile('audiotest.au', 'rb') as fp:
3387            audiodata = fp.read()
3388        with openfile('PyBanner048.gif', 'rb') as fp:
3389            bindata = fp.read()
3390        classes = [
3391            (MIMEApplication, ('',)),
3392            (MIMEAudio, (audiodata,)),
3393            (MIMEImage, (bindata,)),
3394            (MIMEMessage, (Message(),)),
3395            (MIMENonMultipart, ('multipart', 'mixed')),
3396            (MIMEText, ('',)),
3397        ]
3398        for cls, constructor in classes:
3399            with self.subTest(cls=cls.__name__, policy='compat32'):
3400                m = cls(*constructor)
3401                self.assertIs(m.policy, email.policy.compat32)
3402            with self.subTest(cls=cls.__name__, policy='default'):
3403                m = cls(*constructor, policy=email.policy.default)
3404                self.assertIs(m.policy, email.policy.default)
3405
3406
3407# Test the iterator/generators
3408class TestIterators(TestEmailBase):
3409    def test_body_line_iterator(self):
3410        eq = self.assertEqual
3411        neq = self.ndiffAssertEqual
3412        # First a simple non-multipart message
3413        msg = self._msgobj('msg_01.txt')
3414        it = iterators.body_line_iterator(msg)
3415        lines = list(it)
3416        eq(len(lines), 6)
3417        neq(EMPTYSTRING.join(lines), msg.get_payload())
3418        # Now a more complicated multipart
3419        msg = self._msgobj('msg_02.txt')
3420        it = iterators.body_line_iterator(msg)
3421        lines = list(it)
3422        eq(len(lines), 43)
3423        with openfile('msg_19.txt') as fp:
3424            neq(EMPTYSTRING.join(lines), fp.read())
3425
3426    def test_typed_subpart_iterator(self):
3427        eq = self.assertEqual
3428        msg = self._msgobj('msg_04.txt')
3429        it = iterators.typed_subpart_iterator(msg, 'text')
3430        lines = []
3431        subparts = 0
3432        for subpart in it:
3433            subparts += 1
3434            lines.append(subpart.get_payload())
3435        eq(subparts, 2)
3436        eq(EMPTYSTRING.join(lines), """\
3437a simple kind of mirror
3438to reflect upon our own
3439a simple kind of mirror
3440to reflect upon our own
3441""")
3442
3443    def test_typed_subpart_iterator_default_type(self):
3444        eq = self.assertEqual
3445        msg = self._msgobj('msg_03.txt')
3446        it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3447        lines = []
3448        subparts = 0
3449        for subpart in it:
3450            subparts += 1
3451            lines.append(subpart.get_payload())
3452        eq(subparts, 1)
3453        eq(EMPTYSTRING.join(lines), """\
3454
3455Hi,
3456
3457Do you like this message?
3458
3459-Me
3460""")
3461
3462    def test_pushCR_LF(self):
3463        '''FeedParser BufferedSubFile.push() assumed it received complete
3464           line endings.  A CR ending one push() followed by a LF starting
3465           the next push() added an empty line.
3466        '''
3467        imt = [
3468            ("a\r \n",  2),
3469            ("b",       0),
3470            ("c\n",     1),
3471            ("",        0),
3472            ("d\r\n",   1),
3473            ("e\r",     0),
3474            ("\nf",     1),
3475            ("\r\n",    1),
3476          ]
3477        from email.feedparser import BufferedSubFile, NeedMoreData
3478        bsf = BufferedSubFile()
3479        om = []
3480        nt = 0
3481        for il, n in imt:
3482            bsf.push(il)
3483            nt += n
3484            n1 = 0
3485            for ol in iter(bsf.readline, NeedMoreData):
3486                om.append(ol)
3487                n1 += 1
3488            self.assertEqual(n, n1)
3489        self.assertEqual(len(om), nt)
3490        self.assertEqual(''.join([il for il, n in imt]), ''.join(om))
3491
3492    def test_push_random(self):
3493        from email.feedparser import BufferedSubFile, NeedMoreData
3494
3495        n = 10000
3496        chunksize = 5
3497        chars = 'abcd \t\r\n'
3498
3499        s = ''.join(choice(chars) for i in range(n)) + '\n'
3500        target = s.splitlines(True)
3501
3502        bsf = BufferedSubFile()
3503        lines = []
3504        for i in range(0, len(s), chunksize):
3505            chunk = s[i:i+chunksize]
3506            bsf.push(chunk)
3507            lines.extend(iter(bsf.readline, NeedMoreData))
3508        self.assertEqual(lines, target)
3509
3510
3511class TestFeedParsers(TestEmailBase):
3512
3513    def parse(self, chunks):
3514        feedparser = FeedParser()
3515        for chunk in chunks:
3516            feedparser.feed(chunk)
3517        return feedparser.close()
3518
3519    def test_empty_header_name_handled(self):
3520        # Issue 19996
3521        msg = self.parse("First: val\n: bad\nSecond: val")
3522        self.assertEqual(msg['First'], 'val')
3523        self.assertEqual(msg['Second'], 'val')
3524
3525    def test_newlines(self):
3526        m = self.parse(['a:\nb:\rc:\r\nd:\n'])
3527        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3528        m = self.parse(['a:\nb:\rc:\r\nd:'])
3529        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3530        m = self.parse(['a:\rb', 'c:\n'])
3531        self.assertEqual(m.keys(), ['a', 'bc'])
3532        m = self.parse(['a:\r', 'b:\n'])
3533        self.assertEqual(m.keys(), ['a', 'b'])
3534        m = self.parse(['a:\r', '\nb:\n'])
3535        self.assertEqual(m.keys(), ['a', 'b'])
3536
3537        # Only CR and LF should break header fields
3538        m = self.parse(['a:\x85b:\u2028c:\n'])
3539        self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')])
3540        m = self.parse(['a:\r', 'b:\x85', 'c:\n'])
3541        self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')])
3542
3543    def test_long_lines(self):
3544        # Expected peak memory use on 32-bit platform: 6*N*M bytes.
3545        M, N = 1000, 20000
3546        m = self.parse(['a:b\n\n'] + ['x'*M] * N)
3547        self.assertEqual(m.items(), [('a', 'b')])
3548        self.assertEqual(m.get_payload(), 'x'*M*N)
3549        m = self.parse(['a:b\r\r'] + ['x'*M] * N)
3550        self.assertEqual(m.items(), [('a', 'b')])
3551        self.assertEqual(m.get_payload(), 'x'*M*N)
3552        m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N)
3553        self.assertEqual(m.items(), [('a', 'b')])
3554        self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N)
3555        m = self.parse(['a:\r', 'b: '] + ['x'*M] * N)
3556        self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)])
3557
3558
3559class TestParsers(TestEmailBase):
3560
3561    def test_header_parser(self):
3562        eq = self.assertEqual
3563        # Parse only the headers of a complex multipart MIME document
3564        with openfile('msg_02.txt') as fp:
3565            msg = HeaderParser().parse(fp)
3566        eq(msg['from'], 'ppp-request@zzz.org')
3567        eq(msg['to'], 'ppp@zzz.org')
3568        eq(msg.get_content_type(), 'multipart/mixed')
3569        self.assertFalse(msg.is_multipart())
3570        self.assertIsInstance(msg.get_payload(), str)
3571
3572    def test_bytes_header_parser(self):
3573        eq = self.assertEqual
3574        # Parse only the headers of a complex multipart MIME document
3575        with openfile('msg_02.txt', 'rb') as fp:
3576            msg = email.parser.BytesHeaderParser().parse(fp)
3577        eq(msg['from'], 'ppp-request@zzz.org')
3578        eq(msg['to'], 'ppp@zzz.org')
3579        eq(msg.get_content_type(), 'multipart/mixed')
3580        self.assertFalse(msg.is_multipart())
3581        self.assertIsInstance(msg.get_payload(), str)
3582        self.assertIsInstance(msg.get_payload(decode=True), bytes)
3583
3584    def test_bytes_parser_does_not_close_file(self):
3585        with openfile('msg_02.txt', 'rb') as fp:
3586            email.parser.BytesParser().parse(fp)
3587            self.assertFalse(fp.closed)
3588
3589    def test_bytes_parser_on_exception_does_not_close_file(self):
3590        with openfile('msg_15.txt', 'rb') as fp:
3591            bytesParser = email.parser.BytesParser
3592            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3593                              bytesParser(policy=email.policy.strict).parse,
3594                              fp)
3595            self.assertFalse(fp.closed)
3596
3597    def test_parser_does_not_close_file(self):
3598        with openfile('msg_02.txt', 'r') as fp:
3599            email.parser.Parser().parse(fp)
3600            self.assertFalse(fp.closed)
3601
3602    def test_parser_on_exception_does_not_close_file(self):
3603        with openfile('msg_15.txt', 'r') as fp:
3604            parser = email.parser.Parser
3605            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3606                              parser(policy=email.policy.strict).parse, fp)
3607            self.assertFalse(fp.closed)
3608
3609    def test_whitespace_continuation(self):
3610        eq = self.assertEqual
3611        # This message contains a line after the Subject: header that has only
3612        # whitespace, but it is not empty!
3613        msg = email.message_from_string("""\
3614From: aperson@dom.ain
3615To: bperson@dom.ain
3616Subject: the next line has a space on it
3617\x20
3618Date: Mon, 8 Apr 2002 15:09:19 -0400
3619Message-ID: spam
3620
3621Here's the message body
3622""")
3623        eq(msg['subject'], 'the next line has a space on it\n ')
3624        eq(msg['message-id'], 'spam')
3625        eq(msg.get_payload(), "Here's the message body\n")
3626
3627    def test_whitespace_continuation_last_header(self):
3628        eq = self.assertEqual
3629        # Like the previous test, but the subject line is the last
3630        # header.
3631        msg = email.message_from_string("""\
3632From: aperson@dom.ain
3633To: bperson@dom.ain
3634Date: Mon, 8 Apr 2002 15:09:19 -0400
3635Message-ID: spam
3636Subject: the next line has a space on it
3637\x20
3638
3639Here's the message body
3640""")
3641        eq(msg['subject'], 'the next line has a space on it\n ')
3642        eq(msg['message-id'], 'spam')
3643        eq(msg.get_payload(), "Here's the message body\n")
3644
3645    def test_crlf_separation(self):
3646        eq = self.assertEqual
3647        with openfile('msg_26.txt', newline='\n') as fp:
3648            msg = Parser().parse(fp)
3649        eq(len(msg.get_payload()), 2)
3650        part1 = msg.get_payload(0)
3651        eq(part1.get_content_type(), 'text/plain')
3652        eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3653        part2 = msg.get_payload(1)
3654        eq(part2.get_content_type(), 'application/riscos')
3655
3656    def test_crlf_flatten(self):
3657        # Using newline='\n' preserves the crlfs in this input file.
3658        with openfile('msg_26.txt', newline='\n') as fp:
3659            text = fp.read()
3660        msg = email.message_from_string(text)
3661        s = StringIO()
3662        g = Generator(s)
3663        g.flatten(msg, linesep='\r\n')
3664        self.assertEqual(s.getvalue(), text)
3665
3666    maxDiff = None
3667
3668    def test_multipart_digest_with_extra_mime_headers(self):
3669        eq = self.assertEqual
3670        neq = self.ndiffAssertEqual
3671        with openfile('msg_28.txt') as fp:
3672            msg = email.message_from_file(fp)
3673        # Structure is:
3674        # multipart/digest
3675        #   message/rfc822
3676        #     text/plain
3677        #   message/rfc822
3678        #     text/plain
3679        eq(msg.is_multipart(), 1)
3680        eq(len(msg.get_payload()), 2)
3681        part1 = msg.get_payload(0)
3682        eq(part1.get_content_type(), 'message/rfc822')
3683        eq(part1.is_multipart(), 1)
3684        eq(len(part1.get_payload()), 1)
3685        part1a = part1.get_payload(0)
3686        eq(part1a.is_multipart(), 0)
3687        eq(part1a.get_content_type(), 'text/plain')
3688        neq(part1a.get_payload(), 'message 1\n')
3689        # next message/rfc822
3690        part2 = msg.get_payload(1)
3691        eq(part2.get_content_type(), 'message/rfc822')
3692        eq(part2.is_multipart(), 1)
3693        eq(len(part2.get_payload()), 1)
3694        part2a = part2.get_payload(0)
3695        eq(part2a.is_multipart(), 0)
3696        eq(part2a.get_content_type(), 'text/plain')
3697        neq(part2a.get_payload(), 'message 2\n')
3698
3699    def test_three_lines(self):
3700        # A bug report by Andrew McNamara
3701        lines = ['From: Andrew Person <aperson@dom.ain',
3702                 'Subject: Test',
3703                 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3704        msg = email.message_from_string(NL.join(lines))
3705        self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3706
3707    def test_strip_line_feed_and_carriage_return_in_headers(self):
3708        eq = self.assertEqual
3709        # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3710        value1 = 'text'
3711        value2 = 'more text'
3712        m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3713            value1, value2)
3714        msg = email.message_from_string(m)
3715        eq(msg.get('Header'), value1)
3716        eq(msg.get('Next-Header'), value2)
3717
3718    def test_rfc2822_header_syntax(self):
3719        eq = self.assertEqual
3720        m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3721        msg = email.message_from_string(m)
3722        eq(len(msg), 3)
3723        eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3724        eq(msg.get_payload(), 'body')
3725
3726    def test_rfc2822_space_not_allowed_in_header(self):
3727        eq = self.assertEqual
3728        m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3729        msg = email.message_from_string(m)
3730        eq(len(msg.keys()), 0)
3731
3732    def test_rfc2822_one_character_header(self):
3733        eq = self.assertEqual
3734        m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3735        msg = email.message_from_string(m)
3736        headers = msg.keys()
3737        headers.sort()
3738        eq(headers, ['A', 'B', 'CC'])
3739        eq(msg.get_payload(), 'body')
3740
3741    def test_CRLFLF_at_end_of_part(self):
3742        # issue 5610: feedparser should not eat two chars from body part ending
3743        # with "\r\n\n".
3744        m = (
3745            "From: foo@bar.com\n"
3746            "To: baz\n"
3747            "Mime-Version: 1.0\n"
3748            "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3749            "\n"
3750            "--BOUNDARY\n"
3751            "Content-Type: text/plain\n"
3752            "\n"
3753            "body ending with CRLF newline\r\n"
3754            "\n"
3755            "--BOUNDARY--\n"
3756          )
3757        msg = email.message_from_string(m)
3758        self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
3759
3760
3761class Test8BitBytesHandling(TestEmailBase):
3762    # In Python3 all input is string, but that doesn't work if the actual input
3763    # uses an 8bit transfer encoding.  To hack around that, in email 5.1 we
3764    # decode byte streams using the surrogateescape error handler, and
3765    # reconvert to binary at appropriate places if we detect surrogates.  This
3766    # doesn't allow us to transform headers with 8bit bytes (they get munged),
3767    # but it does allow us to parse and preserve them, and to decode body
3768    # parts that use an 8bit CTE.
3769
3770    bodytest_msg = textwrap.dedent("""\
3771        From: foo@bar.com
3772        To: baz
3773        Mime-Version: 1.0
3774        Content-Type: text/plain; charset={charset}
3775        Content-Transfer-Encoding: {cte}
3776
3777        {bodyline}
3778        """)
3779
3780    def test_known_8bit_CTE(self):
3781        m = self.bodytest_msg.format(charset='utf-8',
3782                                     cte='8bit',
3783                                     bodyline='pöstal').encode('utf-8')
3784        msg = email.message_from_bytes(m)
3785        self.assertEqual(msg.get_payload(), "pöstal\n")
3786        self.assertEqual(msg.get_payload(decode=True),
3787                         "pöstal\n".encode('utf-8'))
3788
3789    def test_unknown_8bit_CTE(self):
3790        m = self.bodytest_msg.format(charset='notavalidcharset',
3791                                     cte='8bit',
3792                                     bodyline='pöstal').encode('utf-8')
3793        msg = email.message_from_bytes(m)
3794        self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
3795        self.assertEqual(msg.get_payload(decode=True),
3796                         "pöstal\n".encode('utf-8'))
3797
3798    def test_8bit_in_quopri_body(self):
3799        # This is non-RFC compliant data...without 'decode' the library code
3800        # decodes the body using the charset from the headers, and because the
3801        # source byte really is utf-8 this works.  This is likely to fail
3802        # against real dirty data (ie: produce mojibake), but the data is
3803        # invalid anyway so it is as good a guess as any.  But this means that
3804        # this test just confirms the current behavior; that behavior is not
3805        # necessarily the best possible behavior.  With 'decode' it is
3806        # returning the raw bytes, so that test should be of correct behavior,
3807        # or at least produce the same result that email4 did.
3808        m = self.bodytest_msg.format(charset='utf-8',
3809                                     cte='quoted-printable',
3810                                     bodyline='p=C3=B6stál').encode('utf-8')
3811        msg = email.message_from_bytes(m)
3812        self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3813        self.assertEqual(msg.get_payload(decode=True),
3814                         'pöstál\n'.encode('utf-8'))
3815
3816    def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3817        # This is similar to the previous test, but proves that if the 8bit
3818        # byte is undecodeable in the specified charset, it gets replaced
3819        # by the unicode 'unknown' character.  Again, this may or may not
3820        # be the ideal behavior.  Note that if decode=False none of the
3821        # decoders will get involved, so this is the only test we need
3822        # for this behavior.
3823        m = self.bodytest_msg.format(charset='ascii',
3824                                     cte='quoted-printable',
3825                                     bodyline='p=C3=B6stál').encode('utf-8')
3826        msg = email.message_from_bytes(m)
3827        self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
3828        self.assertEqual(msg.get_payload(decode=True),
3829                        'pöstál\n'.encode('utf-8'))
3830
3831    # test_defect_handling:test_invalid_chars_in_base64_payload
3832    def test_8bit_in_base64_body(self):
3833        # If we get 8bit bytes in a base64 body, we can just ignore them
3834        # as being outside the base64 alphabet and decode anyway.  But
3835        # we register a defect.
3836        m = self.bodytest_msg.format(charset='utf-8',
3837                                     cte='base64',
3838                                     bodyline='cMO2c3RhbAá=').encode('utf-8')
3839        msg = email.message_from_bytes(m)
3840        self.assertEqual(msg.get_payload(decode=True),
3841                         'pöstal'.encode('utf-8'))
3842        self.assertIsInstance(msg.defects[0],
3843                              errors.InvalidBase64CharactersDefect)
3844
3845    def test_8bit_in_uuencode_body(self):
3846        # Sticking an 8bit byte in a uuencode block makes it undecodable by
3847        # normal means, so the block is returned undecoded, but as bytes.
3848        m = self.bodytest_msg.format(charset='utf-8',
3849                                     cte='uuencode',
3850                                     bodyline='<,.V<W1A; á ').encode('utf-8')
3851        msg = email.message_from_bytes(m)
3852        self.assertEqual(msg.get_payload(decode=True),
3853                         '<,.V<W1A; á \n'.encode('utf-8'))
3854
3855
3856    headertest_headers = (
3857        ('From: foo@bar.com', ('From', 'foo@bar.com')),
3858        ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3859        ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3860            '\tJean de Baddie',
3861            ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3862                'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3863                ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3864        ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3865        )
3866    headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3867        '\nYes, they are flying.\n').encode('utf-8')
3868
3869    def test_get_8bit_header(self):
3870        msg = email.message_from_bytes(self.headertest_msg)
3871        self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3872        self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
3873
3874    def test_print_8bit_headers(self):
3875        msg = email.message_from_bytes(self.headertest_msg)
3876        self.assertEqual(str(msg),
3877                         textwrap.dedent("""\
3878                            From: {}
3879                            To: {}
3880                            Subject: {}
3881                            From: {}
3882
3883                            Yes, they are flying.
3884                            """).format(*[expected[1] for (_, expected) in
3885                                        self.headertest_headers]))
3886
3887    def test_values_with_8bit_headers(self):
3888        msg = email.message_from_bytes(self.headertest_msg)
3889        self.assertListEqual([str(x) for x in msg.values()],
3890                              ['foo@bar.com',
3891                               'b\uFFFD\uFFFDz',
3892                               'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3893                                   'coll\uFFFD\uFFFDgue, le pouf '
3894                                   'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3895                                   '\tJean de Baddie',
3896                               "g\uFFFD\uFFFDst"])
3897
3898    def test_items_with_8bit_headers(self):
3899        msg = email.message_from_bytes(self.headertest_msg)
3900        self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
3901                              [('From', 'foo@bar.com'),
3902                               ('To', 'b\uFFFD\uFFFDz'),
3903                               ('Subject', 'Maintenant je vous '
3904                                  'pr\uFFFD\uFFFDsente '
3905                                  'mon coll\uFFFD\uFFFDgue, le pouf '
3906                                  'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3907                                  '\tJean de Baddie'),
3908                               ('From', 'g\uFFFD\uFFFDst')])
3909
3910    def test_get_all_with_8bit_headers(self):
3911        msg = email.message_from_bytes(self.headertest_msg)
3912        self.assertListEqual([str(x) for x in msg.get_all('from')],
3913                              ['foo@bar.com',
3914                               'g\uFFFD\uFFFDst'])
3915
3916    def test_get_content_type_with_8bit(self):
3917        msg = email.message_from_bytes(textwrap.dedent("""\
3918            Content-Type: text/pl\xA7in; charset=utf-8
3919            """).encode('latin-1'))
3920        self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3921        self.assertEqual(msg.get_content_maintype(), "text")
3922        self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3923
3924    # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
3925    def test_get_params_with_8bit(self):
3926        msg = email.message_from_bytes(
3927            'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3928        self.assertEqual(msg.get_params(header='x-header'),
3929           [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3930        self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3931        # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3932        self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3933
3934    # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
3935    def test_get_rfc2231_params_with_8bit(self):
3936        msg = email.message_from_bytes(textwrap.dedent("""\
3937            Content-Type: text/plain; charset=us-ascii;
3938             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3939             ).encode('latin-1'))
3940        self.assertEqual(msg.get_param('title'),
3941            ('us-ascii', 'en', 'This is not f\uFFFDn'))
3942
3943    def test_set_rfc2231_params_with_8bit(self):
3944        msg = email.message_from_bytes(textwrap.dedent("""\
3945            Content-Type: text/plain; charset=us-ascii;
3946             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3947             ).encode('latin-1'))
3948        msg.set_param('title', 'test')
3949        self.assertEqual(msg.get_param('title'), 'test')
3950
3951    def test_del_rfc2231_params_with_8bit(self):
3952        msg = email.message_from_bytes(textwrap.dedent("""\
3953            Content-Type: text/plain; charset=us-ascii;
3954             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3955             ).encode('latin-1'))
3956        msg.del_param('title')
3957        self.assertEqual(msg.get_param('title'), None)
3958        self.assertEqual(msg.get_content_maintype(), 'text')
3959
3960    def test_get_payload_with_8bit_cte_header(self):
3961        msg = email.message_from_bytes(textwrap.dedent("""\
3962            Content-Transfer-Encoding: b\xa7se64
3963            Content-Type: text/plain; charset=latin-1
3964
3965            payload
3966            """).encode('latin-1'))
3967        self.assertEqual(msg.get_payload(), 'payload\n')
3968        self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3969
3970    non_latin_bin_msg = textwrap.dedent("""\
3971        From: foo@bar.com
3972        To: báz
3973        Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3974        \tJean de Baddie
3975        Mime-Version: 1.0
3976        Content-Type: text/plain; charset="utf-8"
3977        Content-Transfer-Encoding: 8bit
3978
3979        Да, они летят.
3980        """).encode('utf-8')
3981
3982    def test_bytes_generator(self):
3983        msg = email.message_from_bytes(self.non_latin_bin_msg)
3984        out = BytesIO()
3985        email.generator.BytesGenerator(out).flatten(msg)
3986        self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3987
3988    def test_bytes_generator_handles_None_body(self):
3989        #Issue 11019
3990        msg = email.message.Message()
3991        out = BytesIO()
3992        email.generator.BytesGenerator(out).flatten(msg)
3993        self.assertEqual(out.getvalue(), b"\n")
3994
3995    non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
3996        From: foo@bar.com
3997        To: =?unknown-8bit?q?b=C3=A1z?=
3998        Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3999         =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
4000         =?unknown-8bit?q?_Jean_de_Baddie?=
4001        Mime-Version: 1.0
4002        Content-Type: text/plain; charset="utf-8"
4003        Content-Transfer-Encoding: base64
4004
4005        0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
4006        """)
4007
4008    def test_generator_handles_8bit(self):
4009        msg = email.message_from_bytes(self.non_latin_bin_msg)
4010        out = StringIO()
4011        email.generator.Generator(out).flatten(msg)
4012        self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
4013
4014    def test_str_generator_should_not_mutate_msg_when_handling_8bit(self):
4015        msg = email.message_from_bytes(self.non_latin_bin_msg)
4016        out = BytesIO()
4017        BytesGenerator(out).flatten(msg)
4018        orig_value = out.getvalue()
4019        Generator(StringIO()).flatten(msg) # Should not mutate msg!
4020        out = BytesIO()
4021        BytesGenerator(out).flatten(msg)
4022        self.assertEqual(out.getvalue(), orig_value)
4023
4024    def test_bytes_generator_with_unix_from(self):
4025        # The unixfrom contains a current date, so we can't check it
4026        # literally.  Just make sure the first word is 'From' and the
4027        # rest of the message matches the input.
4028        msg = email.message_from_bytes(self.non_latin_bin_msg)
4029        out = BytesIO()
4030        email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
4031        lines = out.getvalue().split(b'\n')
4032        self.assertEqual(lines[0].split()[0], b'From')
4033        self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
4034
4035    non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
4036    non_latin_bin_msg_as7bit[2:4] = [
4037        'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
4038         'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
4039    non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
4040
4041    def test_message_from_binary_file(self):
4042        fn = 'test.msg'
4043        self.addCleanup(unlink, fn)
4044        with open(fn, 'wb') as testfile:
4045            testfile.write(self.non_latin_bin_msg)
4046        with open(fn, 'rb') as testfile:
4047            m = email.parser.BytesParser().parse(testfile)
4048        self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
4049
4050    latin_bin_msg = textwrap.dedent("""\
4051        From: foo@bar.com
4052        To: Dinsdale
4053        Subject: Nudge nudge, wink, wink
4054        Mime-Version: 1.0
4055        Content-Type: text/plain; charset="latin-1"
4056        Content-Transfer-Encoding: 8bit
4057
4058        oh là là, know what I mean, know what I mean?
4059        """).encode('latin-1')
4060
4061    latin_bin_msg_as7bit = textwrap.dedent("""\
4062        From: foo@bar.com
4063        To: Dinsdale
4064        Subject: Nudge nudge, wink, wink
4065        Mime-Version: 1.0
4066        Content-Type: text/plain; charset="iso-8859-1"
4067        Content-Transfer-Encoding: quoted-printable
4068
4069        oh l=E0 l=E0, know what I mean, know what I mean?
4070        """)
4071
4072    def test_string_generator_reencodes_to_quopri_when_appropriate(self):
4073        m = email.message_from_bytes(self.latin_bin_msg)
4074        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4075
4076    def test_decoded_generator_emits_unicode_body(self):
4077        m = email.message_from_bytes(self.latin_bin_msg)
4078        out = StringIO()
4079        email.generator.DecodedGenerator(out).flatten(m)
4080        #DecodedHeader output contains an extra blank line compared
4081        #to the input message.  RDM: not sure if this is a bug or not,
4082        #but it is not specific to the 8bit->7bit conversion.
4083        self.assertEqual(out.getvalue(),
4084            self.latin_bin_msg.decode('latin-1')+'\n')
4085
4086    def test_bytes_feedparser(self):
4087        bfp = email.feedparser.BytesFeedParser()
4088        for i in range(0, len(self.latin_bin_msg), 10):
4089            bfp.feed(self.latin_bin_msg[i:i+10])
4090        m = bfp.close()
4091        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4092
4093    def test_crlf_flatten(self):
4094        with openfile('msg_26.txt', 'rb') as fp:
4095            text = fp.read()
4096        msg = email.message_from_bytes(text)
4097        s = BytesIO()
4098        g = email.generator.BytesGenerator(s)
4099        g.flatten(msg, linesep='\r\n')
4100        self.assertEqual(s.getvalue(), text)
4101
4102    def test_8bit_multipart(self):
4103        # Issue 11605
4104        source = textwrap.dedent("""\
4105            Date: Fri, 18 Mar 2011 17:15:43 +0100
4106            To: foo@example.com
4107            From: foodwatch-Newsletter <bar@example.com>
4108            Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
4109            Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
4110            MIME-Version: 1.0
4111            Content-Type: multipart/alternative;
4112                    boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
4113
4114            --b1_76a486bee62b0d200f33dc2ca08220ad
4115            Content-Type: text/plain; charset="utf-8"
4116            Content-Transfer-Encoding: 8bit
4117
4118            Guten Tag, ,
4119
4120            mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
4121            Nachrichten aus Japan.
4122
4123
4124            --b1_76a486bee62b0d200f33dc2ca08220ad
4125            Content-Type: text/html; charset="utf-8"
4126            Content-Transfer-Encoding: 8bit
4127
4128            <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
4129                "http://www.w3.org/TR/html4/loose.dtd">
4130            <html lang="de">
4131            <head>
4132                    <title>foodwatch - Newsletter</title>
4133            </head>
4134            <body>
4135              <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
4136                 die Nachrichten aus Japan.</p>
4137            </body>
4138            </html>
4139            --b1_76a486bee62b0d200f33dc2ca08220ad--
4140
4141            """).encode('utf-8')
4142        msg = email.message_from_bytes(source)
4143        s = BytesIO()
4144        g = email.generator.BytesGenerator(s)
4145        g.flatten(msg)
4146        self.assertEqual(s.getvalue(), source)
4147
4148    def test_bytes_generator_b_encoding_linesep(self):
4149        # Issue 14062: b encoding was tacking on an extra \n.
4150        m = Message()
4151        # This has enough non-ascii that it should always end up b encoded.
4152        m['Subject'] = Header('žluťoučký kůň')
4153        s = BytesIO()
4154        g = email.generator.BytesGenerator(s)
4155        g.flatten(m, linesep='\r\n')
4156        self.assertEqual(
4157            s.getvalue(),
4158            b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4159
4160    def test_generator_b_encoding_linesep(self):
4161        # Since this broke in ByteGenerator, test Generator for completeness.
4162        m = Message()
4163        # This has enough non-ascii that it should always end up b encoded.
4164        m['Subject'] = Header('žluťoučký kůň')
4165        s = StringIO()
4166        g = email.generator.Generator(s)
4167        g.flatten(m, linesep='\r\n')
4168        self.assertEqual(
4169            s.getvalue(),
4170            'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4171
4172    maxDiff = None
4173
4174
4175class BaseTestBytesGeneratorIdempotent:
4176
4177    maxDiff = None
4178
4179    def _msgobj(self, filename):
4180        with openfile(filename, 'rb') as fp:
4181            data = fp.read()
4182        data = self.normalize_linesep_regex.sub(self.blinesep, data)
4183        msg = email.message_from_bytes(data)
4184        return msg, data
4185
4186    def _idempotent(self, msg, data, unixfrom=False):
4187        b = BytesIO()
4188        g = email.generator.BytesGenerator(b, maxheaderlen=0)
4189        g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
4190        self.assertEqual(data, b.getvalue())
4191
4192
4193class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
4194                                    TestIdempotent):
4195    linesep = '\n'
4196    blinesep = b'\n'
4197    normalize_linesep_regex = re.compile(br'\r\n')
4198
4199
4200class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
4201                                       TestIdempotent):
4202    linesep = '\r\n'
4203    blinesep = b'\r\n'
4204    normalize_linesep_regex = re.compile(br'(?<!\r)\n')
4205
4206
4207class TestBase64(unittest.TestCase):
4208    def test_len(self):
4209        eq = self.assertEqual
4210        eq(base64mime.header_length('hello'),
4211           len(base64mime.body_encode(b'hello', eol='')))
4212        for size in range(15):
4213            if   size == 0 : bsize = 0
4214            elif size <= 3 : bsize = 4
4215            elif size <= 6 : bsize = 8
4216            elif size <= 9 : bsize = 12
4217            elif size <= 12: bsize = 16
4218            else           : bsize = 20
4219            eq(base64mime.header_length('x' * size), bsize)
4220
4221    def test_decode(self):
4222        eq = self.assertEqual
4223        eq(base64mime.decode(''), b'')
4224        eq(base64mime.decode('aGVsbG8='), b'hello')
4225
4226    def test_encode(self):
4227        eq = self.assertEqual
4228        eq(base64mime.body_encode(b''), b'')
4229        eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
4230        # Test the binary flag
4231        eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
4232        # Test the maxlinelen arg
4233        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
4234eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4235eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4236eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4237eHh4eCB4eHh4IA==
4238""")
4239        # Test the eol argument
4240        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4241           """\
4242eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4243eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4244eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4245eHh4eCB4eHh4IA==\r
4246""")
4247
4248    def test_header_encode(self):
4249        eq = self.assertEqual
4250        he = base64mime.header_encode
4251        eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
4252        eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
4253        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4254        # Test the charset option
4255        eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
4256        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4257
4258
4259
4260class TestQuopri(unittest.TestCase):
4261    def setUp(self):
4262        # Set of characters (as byte integers) that don't need to be encoded
4263        # in headers.
4264        self.hlit = list(chain(
4265            range(ord('a'), ord('z') + 1),
4266            range(ord('A'), ord('Z') + 1),
4267            range(ord('0'), ord('9') + 1),
4268            (c for c in b'!*+-/')))
4269        # Set of characters (as byte integers) that do need to be encoded in
4270        # headers.
4271        self.hnon = [c for c in range(256) if c not in self.hlit]
4272        assert len(self.hlit) + len(self.hnon) == 256
4273        # Set of characters (as byte integers) that don't need to be encoded
4274        # in bodies.
4275        self.blit = list(range(ord(' '), ord('~') + 1))
4276        self.blit.append(ord('\t'))
4277        self.blit.remove(ord('='))
4278        # Set of characters (as byte integers) that do need to be encoded in
4279        # bodies.
4280        self.bnon = [c for c in range(256) if c not in self.blit]
4281        assert len(self.blit) + len(self.bnon) == 256
4282
4283    def test_quopri_header_check(self):
4284        for c in self.hlit:
4285            self.assertFalse(quoprimime.header_check(c),
4286                        'Should not be header quopri encoded: %s' % chr(c))
4287        for c in self.hnon:
4288            self.assertTrue(quoprimime.header_check(c),
4289                            'Should be header quopri encoded: %s' % chr(c))
4290
4291    def test_quopri_body_check(self):
4292        for c in self.blit:
4293            self.assertFalse(quoprimime.body_check(c),
4294                        'Should not be body quopri encoded: %s' % chr(c))
4295        for c in self.bnon:
4296            self.assertTrue(quoprimime.body_check(c),
4297                            'Should be body quopri encoded: %s' % chr(c))
4298
4299    def test_header_quopri_len(self):
4300        eq = self.assertEqual
4301        eq(quoprimime.header_length(b'hello'), 5)
4302        # RFC 2047 chrome is not included in header_length().
4303        eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
4304           quoprimime.header_length(b'hello') +
4305           # =?xxx?q?...?= means 10 extra characters
4306           10)
4307        eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4308        # RFC 2047 chrome is not included in header_length().
4309        eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
4310           quoprimime.header_length(b'h@e@l@l@o@') +
4311           # =?xxx?q?...?= means 10 extra characters
4312           10)
4313        for c in self.hlit:
4314            eq(quoprimime.header_length(bytes([c])), 1,
4315               'expected length 1 for %r' % chr(c))
4316        for c in self.hnon:
4317            # Space is special; it's encoded to _
4318            if c == ord(' '):
4319                continue
4320            eq(quoprimime.header_length(bytes([c])), 3,
4321               'expected length 3 for %r' % chr(c))
4322        eq(quoprimime.header_length(b' '), 1)
4323
4324    def test_body_quopri_len(self):
4325        eq = self.assertEqual
4326        for c in self.blit:
4327            eq(quoprimime.body_length(bytes([c])), 1)
4328        for c in self.bnon:
4329            eq(quoprimime.body_length(bytes([c])), 3)
4330
4331    def test_quote_unquote_idempotent(self):
4332        for x in range(256):
4333            c = chr(x)
4334            self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4335
4336    def _test_header_encode(self, header, expected_encoded_header, charset=None):
4337        if charset is None:
4338            encoded_header = quoprimime.header_encode(header)
4339        else:
4340            encoded_header = quoprimime.header_encode(header, charset)
4341        self.assertEqual(encoded_header, expected_encoded_header)
4342
4343    def test_header_encode_null(self):
4344        self._test_header_encode(b'', '')
4345
4346    def test_header_encode_one_word(self):
4347        self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4348
4349    def test_header_encode_two_lines(self):
4350        self._test_header_encode(b'hello\nworld',
4351                                '=?iso-8859-1?q?hello=0Aworld?=')
4352
4353    def test_header_encode_non_ascii(self):
4354        self._test_header_encode(b'hello\xc7there',
4355                                '=?iso-8859-1?q?hello=C7there?=')
4356
4357    def test_header_encode_alt_charset(self):
4358        self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4359                charset='iso-8859-2')
4360
4361    def _test_header_decode(self, encoded_header, expected_decoded_header):
4362        decoded_header = quoprimime.header_decode(encoded_header)
4363        self.assertEqual(decoded_header, expected_decoded_header)
4364
4365    def test_header_decode_null(self):
4366        self._test_header_decode('', '')
4367
4368    def test_header_decode_one_word(self):
4369        self._test_header_decode('hello', 'hello')
4370
4371    def test_header_decode_two_lines(self):
4372        self._test_header_decode('hello=0Aworld', 'hello\nworld')
4373
4374    def test_header_decode_non_ascii(self):
4375        self._test_header_decode('hello=C7there', 'hello\xc7there')
4376
4377    def test_header_decode_re_bug_18380(self):
4378        # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4379        self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4380
4381    def _test_decode(self, encoded, expected_decoded, eol=None):
4382        if eol is None:
4383            decoded = quoprimime.decode(encoded)
4384        else:
4385            decoded = quoprimime.decode(encoded, eol=eol)
4386        self.assertEqual(decoded, expected_decoded)
4387
4388    def test_decode_null_word(self):
4389        self._test_decode('', '')
4390
4391    def test_decode_null_line_null_word(self):
4392        self._test_decode('\r\n', '\n')
4393
4394    def test_decode_one_word(self):
4395        self._test_decode('hello', 'hello')
4396
4397    def test_decode_one_word_eol(self):
4398        self._test_decode('hello', 'hello', eol='X')
4399
4400    def test_decode_one_line(self):
4401        self._test_decode('hello\r\n', 'hello\n')
4402
4403    def test_decode_one_line_lf(self):
4404        self._test_decode('hello\n', 'hello\n')
4405
4406    def test_decode_one_line_cr(self):
4407        self._test_decode('hello\r', 'hello\n')
4408
4409    def test_decode_one_line_nl(self):
4410        self._test_decode('hello\n', 'helloX', eol='X')
4411
4412    def test_decode_one_line_crnl(self):
4413        self._test_decode('hello\r\n', 'helloX', eol='X')
4414
4415    def test_decode_one_line_one_word(self):
4416        self._test_decode('hello\r\nworld', 'hello\nworld')
4417
4418    def test_decode_one_line_one_word_eol(self):
4419        self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4420
4421    def test_decode_two_lines(self):
4422        self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4423
4424    def test_decode_two_lines_eol(self):
4425        self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4426
4427    def test_decode_one_long_line(self):
4428        self._test_decode('Spam' * 250, 'Spam' * 250)
4429
4430    def test_decode_one_space(self):
4431        self._test_decode(' ', '')
4432
4433    def test_decode_multiple_spaces(self):
4434        self._test_decode(' ' * 5, '')
4435
4436    def test_decode_one_line_trailing_spaces(self):
4437        self._test_decode('hello    \r\n', 'hello\n')
4438
4439    def test_decode_two_lines_trailing_spaces(self):
4440        self._test_decode('hello    \r\nworld   \r\n', 'hello\nworld\n')
4441
4442    def test_decode_quoted_word(self):
4443        self._test_decode('=22quoted=20words=22', '"quoted words"')
4444
4445    def test_decode_uppercase_quoting(self):
4446        self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4447
4448    def test_decode_lowercase_quoting(self):
4449        self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4450
4451    def test_decode_soft_line_break(self):
4452        self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4453
4454    def test_decode_false_quoting(self):
4455        self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4456
4457    def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4458        kwargs = {}
4459        if maxlinelen is None:
4460            # Use body_encode's default.
4461            maxlinelen = 76
4462        else:
4463            kwargs['maxlinelen'] = maxlinelen
4464        if eol is None:
4465            # Use body_encode's default.
4466            eol = '\n'
4467        else:
4468            kwargs['eol'] = eol
4469        encoded_body = quoprimime.body_encode(body, **kwargs)
4470        self.assertEqual(encoded_body, expected_encoded_body)
4471        if eol == '\n' or eol == '\r\n':
4472            # We know how to split the result back into lines, so maxlinelen
4473            # can be checked.
4474            for line in encoded_body.splitlines():
4475                self.assertLessEqual(len(line), maxlinelen)
4476
4477    def test_encode_null(self):
4478        self._test_encode('', '')
4479
4480    def test_encode_null_lines(self):
4481        self._test_encode('\n\n', '\n\n')
4482
4483    def test_encode_one_line(self):
4484        self._test_encode('hello\n', 'hello\n')
4485
4486    def test_encode_one_line_crlf(self):
4487        self._test_encode('hello\r\n', 'hello\n')
4488
4489    def test_encode_one_line_eol(self):
4490        self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4491
4492    def test_encode_one_line_eol_after_non_ascii(self):
4493        # issue 20206; see changeset 0cf700464177 for why the encode/decode.
4494        self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'),
4495                          'hello=CF=85\r\n', eol='\r\n')
4496
4497    def test_encode_one_space(self):
4498        self._test_encode(' ', '=20')
4499
4500    def test_encode_one_line_one_space(self):
4501        self._test_encode(' \n', '=20\n')
4502
4503# XXX: body_encode() expect strings, but uses ord(char) from these strings
4504# to index into a 256-entry list.  For code points above 255, this will fail.
4505# Should there be a check for 8-bit only ord() values in body, or at least
4506# a comment about the expected input?
4507
4508    def test_encode_two_lines_one_space(self):
4509        self._test_encode(' \n \n', '=20\n=20\n')
4510
4511    def test_encode_one_word_trailing_spaces(self):
4512        self._test_encode('hello   ', 'hello  =20')
4513
4514    def test_encode_one_line_trailing_spaces(self):
4515        self._test_encode('hello   \n', 'hello  =20\n')
4516
4517    def test_encode_one_word_trailing_tab(self):
4518        self._test_encode('hello  \t', 'hello  =09')
4519
4520    def test_encode_one_line_trailing_tab(self):
4521        self._test_encode('hello  \t\n', 'hello  =09\n')
4522
4523    def test_encode_trailing_space_before_maxlinelen(self):
4524        self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4525
4526    def test_encode_trailing_space_at_maxlinelen(self):
4527        self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4528
4529    def test_encode_trailing_space_beyond_maxlinelen(self):
4530        self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4531
4532    def test_encode_whitespace_lines(self):
4533        self._test_encode(' \n' * 5, '=20\n' * 5)
4534
4535    def test_encode_quoted_equals(self):
4536        self._test_encode('a = b', 'a =3D b')
4537
4538    def test_encode_one_long_string(self):
4539        self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4540
4541    def test_encode_one_long_line(self):
4542        self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4543
4544    def test_encode_one_very_long_line(self):
4545        self._test_encode('x' * 200 + '\n',
4546                2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4547
4548    def test_encode_shortest_maxlinelen(self):
4549        self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
4550
4551    def test_encode_maxlinelen_too_small(self):
4552        self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4553
4554    def test_encode(self):
4555        eq = self.assertEqual
4556        eq(quoprimime.body_encode(''), '')
4557        eq(quoprimime.body_encode('hello'), 'hello')
4558        # Test the binary flag
4559        eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
4560        # Test the maxlinelen arg
4561        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
4562xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4563 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4564x xxxx xxxx xxxx xxxx=20""")
4565        # Test the eol argument
4566        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4567           """\
4568xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4569 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4570x xxxx xxxx xxxx xxxx=20""")
4571        eq(quoprimime.body_encode("""\
4572one line
4573
4574two line"""), """\
4575one line
4576
4577two line""")
4578
4579
4580
4581# Test the Charset class
4582class TestCharset(unittest.TestCase):
4583    def tearDown(self):
4584        from email import charset as CharsetModule
4585        try:
4586            del CharsetModule.CHARSETS['fake']
4587        except KeyError:
4588            pass
4589
4590    def test_codec_encodeable(self):
4591        eq = self.assertEqual
4592        # Make sure us-ascii = no Unicode conversion
4593        c = Charset('us-ascii')
4594        eq(c.header_encode('Hello World!'), 'Hello World!')
4595        # Test 8-bit idempotency with us-ascii
4596        s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
4597        self.assertRaises(UnicodeError, c.header_encode, s)
4598        c = Charset('utf-8')
4599        eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
4600
4601    def test_body_encode(self):
4602        eq = self.assertEqual
4603        # Try a charset with QP body encoding
4604        c = Charset('iso-8859-1')
4605        eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
4606        # Try a charset with Base64 body encoding
4607        c = Charset('utf-8')
4608        eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
4609        # Try a charset with None body encoding
4610        c = Charset('us-ascii')
4611        eq('hello world', c.body_encode('hello world'))
4612        # Try the convert argument, where input codec != output codec
4613        c = Charset('euc-jp')
4614        # With apologies to Tokio Kikuchi ;)
4615        # XXX FIXME
4616##         try:
4617##             eq('\x1b$B5FCO;~IW\x1b(B',
4618##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4619##             eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4620##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4621##         except LookupError:
4622##             # We probably don't have the Japanese codecs installed
4623##             pass
4624        # Testing SF bug #625509, which we have to fake, since there are no
4625        # built-in encodings where the header encoding is QP but the body
4626        # encoding is not.
4627        from email import charset as CharsetModule
4628        CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
4629        c = Charset('fake')
4630        eq('hello world', c.body_encode('hello world'))
4631
4632    def test_unicode_charset_name(self):
4633        charset = Charset('us-ascii')
4634        self.assertEqual(str(charset), 'us-ascii')
4635        self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4636
4637
4638
4639# Test multilingual MIME headers.
4640class TestHeader(TestEmailBase):
4641    def test_simple(self):
4642        eq = self.ndiffAssertEqual
4643        h = Header('Hello World!')
4644        eq(h.encode(), 'Hello World!')
4645        h.append(' Goodbye World!')
4646        eq(h.encode(), 'Hello World!  Goodbye World!')
4647
4648    def test_simple_surprise(self):
4649        eq = self.ndiffAssertEqual
4650        h = Header('Hello World!')
4651        eq(h.encode(), 'Hello World!')
4652        h.append('Goodbye World!')
4653        eq(h.encode(), 'Hello World! Goodbye World!')
4654
4655    def test_header_needs_no_decoding(self):
4656        h = 'no decoding needed'
4657        self.assertEqual(decode_header(h), [(h, None)])
4658
4659    def test_long(self):
4660        h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4661                   maxlinelen=76)
4662        for l in h.encode(splitchars=' ').split('\n '):
4663            self.assertLessEqual(len(l), 76)
4664
4665    def test_multilingual(self):
4666        eq = self.ndiffAssertEqual
4667        g = Charset("iso-8859-1")
4668        cz = Charset("iso-8859-2")
4669        utf8 = Charset("utf-8")
4670        g_head = (b'Die Mieter treten hier ein werden mit einem '
4671                  b'Foerderband komfortabel den Korridor entlang, '
4672                  b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4673                  b'gegen die rotierenden Klingen bef\xf6rdert. ')
4674        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4675                   b'd\xf9vtipu.. ')
4676        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4677                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4678                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4679                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4680                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4681                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4682                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4683                     '\u3044\u307e\u3059\u3002')
4684        h = Header(g_head, g)
4685        h.append(cz_head, cz)
4686        h.append(utf8_head, utf8)
4687        enc = h.encode(maxlinelen=76)
4688        eq(enc, """\
4689=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4690 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4691 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4692 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
4693 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4694 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4695 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4696 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
4697 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4698 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4699 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4700        decoded = decode_header(enc)
4701        eq(len(decoded), 3)
4702        eq(decoded[0], (g_head, 'iso-8859-1'))
4703        eq(decoded[1], (cz_head, 'iso-8859-2'))
4704        eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
4705        ustr = str(h)
4706        eq(ustr,
4707           (b'Die Mieter treten hier ein werden mit einem Foerderband '
4708            b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4709            b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4710            b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4711            b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4712            b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4713            b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4714            b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4715            b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4716            b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4717            b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4718            b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4719            b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4720            b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4721            b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4722            b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4723            ).decode('utf-8'))
4724        # Test make_header()
4725        newh = make_header(decode_header(enc))
4726        eq(newh, h)
4727
4728    def test_empty_header_encode(self):
4729        h = Header()
4730        self.assertEqual(h.encode(), '')
4731
4732    def test_header_ctor_default_args(self):
4733        eq = self.ndiffAssertEqual
4734        h = Header()
4735        eq(h, '')
4736        h.append('foo', Charset('iso-8859-1'))
4737        eq(h, 'foo')
4738
4739    def test_explicit_maxlinelen(self):
4740        eq = self.ndiffAssertEqual
4741        hstr = ('A very long line that must get split to something other '
4742                'than at the 76th character boundary to test the non-default '
4743                'behavior')
4744        h = Header(hstr)
4745        eq(h.encode(), '''\
4746A very long line that must get split to something other than at the 76th
4747 character boundary to test the non-default behavior''')
4748        eq(str(h), hstr)
4749        h = Header(hstr, header_name='Subject')
4750        eq(h.encode(), '''\
4751A very long line that must get split to something other than at the
4752 76th character boundary to test the non-default behavior''')
4753        eq(str(h), hstr)
4754        h = Header(hstr, maxlinelen=1024, header_name='Subject')
4755        eq(h.encode(), hstr)
4756        eq(str(h), hstr)
4757
4758    def test_quopri_splittable(self):
4759        eq = self.ndiffAssertEqual
4760        h = Header(charset='iso-8859-1', maxlinelen=20)
4761        x = 'xxxx ' * 20
4762        h.append(x)
4763        s = h.encode()
4764        eq(s, """\
4765=?iso-8859-1?q?xxx?=
4766 =?iso-8859-1?q?x_?=
4767 =?iso-8859-1?q?xx?=
4768 =?iso-8859-1?q?xx?=
4769 =?iso-8859-1?q?_x?=
4770 =?iso-8859-1?q?xx?=
4771 =?iso-8859-1?q?x_?=
4772 =?iso-8859-1?q?xx?=
4773 =?iso-8859-1?q?xx?=
4774 =?iso-8859-1?q?_x?=
4775 =?iso-8859-1?q?xx?=
4776 =?iso-8859-1?q?x_?=
4777 =?iso-8859-1?q?xx?=
4778 =?iso-8859-1?q?xx?=
4779 =?iso-8859-1?q?_x?=
4780 =?iso-8859-1?q?xx?=
4781 =?iso-8859-1?q?x_?=
4782 =?iso-8859-1?q?xx?=
4783 =?iso-8859-1?q?xx?=
4784 =?iso-8859-1?q?_x?=
4785 =?iso-8859-1?q?xx?=
4786 =?iso-8859-1?q?x_?=
4787 =?iso-8859-1?q?xx?=
4788 =?iso-8859-1?q?xx?=
4789 =?iso-8859-1?q?_x?=
4790 =?iso-8859-1?q?xx?=
4791 =?iso-8859-1?q?x_?=
4792 =?iso-8859-1?q?xx?=
4793 =?iso-8859-1?q?xx?=
4794 =?iso-8859-1?q?_x?=
4795 =?iso-8859-1?q?xx?=
4796 =?iso-8859-1?q?x_?=
4797 =?iso-8859-1?q?xx?=
4798 =?iso-8859-1?q?xx?=
4799 =?iso-8859-1?q?_x?=
4800 =?iso-8859-1?q?xx?=
4801 =?iso-8859-1?q?x_?=
4802 =?iso-8859-1?q?xx?=
4803 =?iso-8859-1?q?xx?=
4804 =?iso-8859-1?q?_x?=
4805 =?iso-8859-1?q?xx?=
4806 =?iso-8859-1?q?x_?=
4807 =?iso-8859-1?q?xx?=
4808 =?iso-8859-1?q?xx?=
4809 =?iso-8859-1?q?_x?=
4810 =?iso-8859-1?q?xx?=
4811 =?iso-8859-1?q?x_?=
4812 =?iso-8859-1?q?xx?=
4813 =?iso-8859-1?q?xx?=
4814 =?iso-8859-1?q?_?=""")
4815        eq(x, str(make_header(decode_header(s))))
4816        h = Header(charset='iso-8859-1', maxlinelen=40)
4817        h.append('xxxx ' * 20)
4818        s = h.encode()
4819        eq(s, """\
4820=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4821 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4822 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4823 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4824 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4825        eq(x, str(make_header(decode_header(s))))
4826
4827    def test_base64_splittable(self):
4828        eq = self.ndiffAssertEqual
4829        h = Header(charset='koi8-r', maxlinelen=20)
4830        x = 'xxxx ' * 20
4831        h.append(x)
4832        s = h.encode()
4833        eq(s, """\
4834=?koi8-r?b?eHh4?=
4835 =?koi8-r?b?eCB4?=
4836 =?koi8-r?b?eHh4?=
4837 =?koi8-r?b?IHh4?=
4838 =?koi8-r?b?eHgg?=
4839 =?koi8-r?b?eHh4?=
4840 =?koi8-r?b?eCB4?=
4841 =?koi8-r?b?eHh4?=
4842 =?koi8-r?b?IHh4?=
4843 =?koi8-r?b?eHgg?=
4844 =?koi8-r?b?eHh4?=
4845 =?koi8-r?b?eCB4?=
4846 =?koi8-r?b?eHh4?=
4847 =?koi8-r?b?IHh4?=
4848 =?koi8-r?b?eHgg?=
4849 =?koi8-r?b?eHh4?=
4850 =?koi8-r?b?eCB4?=
4851 =?koi8-r?b?eHh4?=
4852 =?koi8-r?b?IHh4?=
4853 =?koi8-r?b?eHgg?=
4854 =?koi8-r?b?eHh4?=
4855 =?koi8-r?b?eCB4?=
4856 =?koi8-r?b?eHh4?=
4857 =?koi8-r?b?IHh4?=
4858 =?koi8-r?b?eHgg?=
4859 =?koi8-r?b?eHh4?=
4860 =?koi8-r?b?eCB4?=
4861 =?koi8-r?b?eHh4?=
4862 =?koi8-r?b?IHh4?=
4863 =?koi8-r?b?eHgg?=
4864 =?koi8-r?b?eHh4?=
4865 =?koi8-r?b?eCB4?=
4866 =?koi8-r?b?eHh4?=
4867 =?koi8-r?b?IA==?=""")
4868        eq(x, str(make_header(decode_header(s))))
4869        h = Header(charset='koi8-r', maxlinelen=40)
4870        h.append(x)
4871        s = h.encode()
4872        eq(s, """\
4873=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4874 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4875 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4876 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4877 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4878 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4879        eq(x, str(make_header(decode_header(s))))
4880
4881    def test_us_ascii_header(self):
4882        eq = self.assertEqual
4883        s = 'hello'
4884        x = decode_header(s)
4885        eq(x, [('hello', None)])
4886        h = make_header(x)
4887        eq(s, h.encode())
4888
4889    def test_string_charset(self):
4890        eq = self.assertEqual
4891        h = Header()
4892        h.append('hello', 'iso-8859-1')
4893        eq(h, 'hello')
4894
4895##    def test_unicode_error(self):
4896##        raises = self.assertRaises
4897##        raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4898##        raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4899##        h = Header()
4900##        raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4901##        raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4902##        raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4903
4904    def test_utf8_shortest(self):
4905        eq = self.assertEqual
4906        h = Header('p\xf6stal', 'utf-8')
4907        eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4908        h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4909        eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4910
4911    def test_bad_8bit_header(self):
4912        raises = self.assertRaises
4913        eq = self.assertEqual
4914        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4915        raises(UnicodeError, Header, x)
4916        h = Header()
4917        raises(UnicodeError, h.append, x)
4918        e = x.decode('utf-8', 'replace')
4919        eq(str(Header(x, errors='replace')), e)
4920        h.append(x, errors='replace')
4921        eq(str(h), e)
4922
4923    def test_escaped_8bit_header(self):
4924        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4925        e = x.decode('ascii', 'surrogateescape')
4926        h = Header(e, charset=email.charset.UNKNOWN8BIT)
4927        self.assertEqual(str(h),
4928                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4929        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4930
4931    def test_header_handles_binary_unknown8bit(self):
4932        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4933        h = Header(x, charset=email.charset.UNKNOWN8BIT)
4934        self.assertEqual(str(h),
4935                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4936        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4937
4938    def test_make_header_handles_binary_unknown8bit(self):
4939        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4940        h = Header(x, charset=email.charset.UNKNOWN8BIT)
4941        h2 = email.header.make_header(email.header.decode_header(h))
4942        self.assertEqual(str(h2),
4943                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4944        self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4945
4946    def test_modify_returned_list_does_not_change_header(self):
4947        h = Header('test')
4948        chunks = email.header.decode_header(h)
4949        chunks.append(('ascii', 'test2'))
4950        self.assertEqual(str(h), 'test')
4951
4952    def test_encoded_adjacent_nonencoded(self):
4953        eq = self.assertEqual
4954        h = Header()
4955        h.append('hello', 'iso-8859-1')
4956        h.append('world')
4957        s = h.encode()
4958        eq(s, '=?iso-8859-1?q?hello?= world')
4959        h = make_header(decode_header(s))
4960        eq(h.encode(), s)
4961
4962    def test_whitespace_keeper(self):
4963        eq = self.assertEqual
4964        s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4965        parts = decode_header(s)
4966        eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
4967        hdr = make_header(parts)
4968        eq(hdr.encode(),
4969           'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4970
4971    def test_broken_base64_header(self):
4972        raises = self.assertRaises
4973        s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
4974        raises(errors.HeaderParseError, decode_header, s)
4975
4976    def test_shift_jis_charset(self):
4977        h = Header('文', charset='shift_jis')
4978        self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4979
4980    def test_flatten_header_with_no_value(self):
4981        # Issue 11401 (regression from email 4.x)  Note that the space after
4982        # the header doesn't reflect the input, but this is also the way
4983        # email 4.x behaved.  At some point it would be nice to fix that.
4984        msg = email.message_from_string("EmptyHeader:")
4985        self.assertEqual(str(msg), "EmptyHeader: \n\n")
4986
4987    def test_encode_preserves_leading_ws_on_value(self):
4988        msg = Message()
4989        msg['SomeHeader'] = '   value with leading ws'
4990        self.assertEqual(str(msg), "SomeHeader:    value with leading ws\n\n")
4991
4992    def test_whitespace_header(self):
4993        self.assertEqual(Header(' ').encode(), ' ')
4994
4995
4996
4997# Test RFC 2231 header parameters (en/de)coding
4998class TestRFC2231(TestEmailBase):
4999
5000    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5001    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5002    def test_get_param(self):
5003        eq = self.assertEqual
5004        msg = self._msgobj('msg_29.txt')
5005        eq(msg.get_param('title'),
5006           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5007        eq(msg.get_param('title', unquote=False),
5008           ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
5009
5010    def test_set_param(self):
5011        eq = self.ndiffAssertEqual
5012        msg = Message()
5013        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5014                      charset='us-ascii')
5015        eq(msg.get_param('title'),
5016           ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
5017        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5018                      charset='us-ascii', language='en')
5019        eq(msg.get_param('title'),
5020           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5021        msg = self._msgobj('msg_01.txt')
5022        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5023                      charset='us-ascii', language='en')
5024        eq(msg.as_string(maxheaderlen=78), """\
5025Return-Path: <bbb@zzz.org>
5026Delivered-To: bbb@zzz.org
5027Received: by mail.zzz.org (Postfix, from userid 889)
5028\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5029MIME-Version: 1.0
5030Content-Transfer-Encoding: 7bit
5031Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
5032From: bbb@ddd.com (John X. Doe)
5033To: bbb@zzz.org
5034Subject: This is a test message
5035Date: Fri, 4 May 2001 14:05:44 -0400
5036Content-Type: text/plain; charset=us-ascii;
5037 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5038
5039
5040Hi,
5041
5042Do you like this message?
5043
5044-Me
5045""")
5046
5047    def test_set_param_requote(self):
5048        msg = Message()
5049        msg.set_param('title', 'foo')
5050        self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
5051        msg.set_param('title', 'bar', requote=False)
5052        self.assertEqual(msg['content-type'], 'text/plain; title=bar')
5053        # tspecial is still quoted.
5054        msg.set_param('title', "(bar)bell", requote=False)
5055        self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
5056
5057    def test_del_param(self):
5058        eq = self.ndiffAssertEqual
5059        msg = self._msgobj('msg_01.txt')
5060        msg.set_param('foo', 'bar', charset='us-ascii', language='en')
5061        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5062            charset='us-ascii', language='en')
5063        msg.del_param('foo', header='Content-Type')
5064        eq(msg.as_string(maxheaderlen=78), """\
5065Return-Path: <bbb@zzz.org>
5066Delivered-To: bbb@zzz.org
5067Received: by mail.zzz.org (Postfix, from userid 889)
5068\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5069MIME-Version: 1.0
5070Content-Transfer-Encoding: 7bit
5071Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
5072From: bbb@ddd.com (John X. Doe)
5073To: bbb@zzz.org
5074Subject: This is a test message
5075Date: Fri, 4 May 2001 14:05:44 -0400
5076Content-Type: text/plain; charset="us-ascii";
5077 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5078
5079
5080Hi,
5081
5082Do you like this message?
5083
5084-Me
5085""")
5086
5087    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
5088    # I changed the charset name, though, because the one in the file isn't
5089    # a legal charset name.  Should add a test for an illegal charset.
5090    def test_rfc2231_get_content_charset(self):
5091        eq = self.assertEqual
5092        msg = self._msgobj('msg_32.txt')
5093        eq(msg.get_content_charset(), 'us-ascii')
5094
5095    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
5096    def test_rfc2231_parse_rfc_quoting(self):
5097        m = textwrap.dedent('''\
5098            Content-Disposition: inline;
5099            \tfilename*0*=''This%20is%20even%20more%20;
5100            \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
5101            \tfilename*2="is it not.pdf"
5102
5103            ''')
5104        msg = email.message_from_string(m)
5105        self.assertEqual(msg.get_filename(),
5106                         'This is even more ***fun*** is it not.pdf')
5107        self.assertEqual(m, msg.as_string())
5108
5109    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5110    def test_rfc2231_parse_extra_quoting(self):
5111        m = textwrap.dedent('''\
5112            Content-Disposition: inline;
5113            \tfilename*0*="''This%20is%20even%20more%20";
5114            \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5115            \tfilename*2="is it not.pdf"
5116
5117            ''')
5118        msg = email.message_from_string(m)
5119        self.assertEqual(msg.get_filename(),
5120                         'This is even more ***fun*** is it not.pdf')
5121        self.assertEqual(m, msg.as_string())
5122
5123    # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
5124    # but new test uses *0* because otherwise lang/charset is not valid.
5125    # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
5126    def test_rfc2231_no_language_or_charset(self):
5127        m = '''\
5128Content-Transfer-Encoding: 8bit
5129Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
5130Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
5131
5132'''
5133        msg = email.message_from_string(m)
5134        param = msg.get_param('NAME')
5135        self.assertNotIsInstance(param, tuple)
5136        self.assertEqual(
5137            param,
5138            'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
5139
5140    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
5141    def test_rfc2231_no_language_or_charset_in_filename(self):
5142        m = '''\
5143Content-Disposition: inline;
5144\tfilename*0*="''This%20is%20even%20more%20";
5145\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5146\tfilename*2="is it not.pdf"
5147
5148'''
5149        msg = email.message_from_string(m)
5150        self.assertEqual(msg.get_filename(),
5151                         'This is even more ***fun*** is it not.pdf')
5152
5153    # Duplicate of previous test?
5154    def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
5155        m = '''\
5156Content-Disposition: inline;
5157\tfilename*0*="''This%20is%20even%20more%20";
5158\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5159\tfilename*2="is it not.pdf"
5160
5161'''
5162        msg = email.message_from_string(m)
5163        self.assertEqual(msg.get_filename(),
5164                         'This is even more ***fun*** is it not.pdf')
5165
5166    # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
5167    # but the test below is wrong (the first part should be decoded).
5168    def test_rfc2231_partly_encoded(self):
5169        m = '''\
5170Content-Disposition: inline;
5171\tfilename*0="''This%20is%20even%20more%20";
5172\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5173\tfilename*2="is it not.pdf"
5174
5175'''
5176        msg = email.message_from_string(m)
5177        self.assertEqual(
5178            msg.get_filename(),
5179            'This%20is%20even%20more%20***fun*** is it not.pdf')
5180
5181    def test_rfc2231_partly_nonencoded(self):
5182        m = '''\
5183Content-Disposition: inline;
5184\tfilename*0="This%20is%20even%20more%20";
5185\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
5186\tfilename*2="is it not.pdf"
5187
5188'''
5189        msg = email.message_from_string(m)
5190        self.assertEqual(
5191            msg.get_filename(),
5192            'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
5193
5194    def test_rfc2231_no_language_or_charset_in_boundary(self):
5195        m = '''\
5196Content-Type: multipart/alternative;
5197\tboundary*0*="''This%20is%20even%20more%20";
5198\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
5199\tboundary*2="is it not.pdf"
5200
5201'''
5202        msg = email.message_from_string(m)
5203        self.assertEqual(msg.get_boundary(),
5204                         'This is even more ***fun*** is it not.pdf')
5205
5206    def test_rfc2231_no_language_or_charset_in_charset(self):
5207        # This is a nonsensical charset value, but tests the code anyway
5208        m = '''\
5209Content-Type: text/plain;
5210\tcharset*0*="This%20is%20even%20more%20";
5211\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
5212\tcharset*2="is it not.pdf"
5213
5214'''
5215        msg = email.message_from_string(m)
5216        self.assertEqual(msg.get_content_charset(),
5217                         'this is even more ***fun*** is it not.pdf')
5218
5219    # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
5220    def test_rfc2231_bad_encoding_in_filename(self):
5221        m = '''\
5222Content-Disposition: inline;
5223\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
5224\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5225\tfilename*2="is it not.pdf"
5226
5227'''
5228        msg = email.message_from_string(m)
5229        self.assertEqual(msg.get_filename(),
5230                         'This is even more ***fun*** is it not.pdf')
5231
5232    def test_rfc2231_bad_encoding_in_charset(self):
5233        m = """\
5234Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
5235
5236"""
5237        msg = email.message_from_string(m)
5238        # This should return None because non-ascii characters in the charset
5239        # are not allowed.
5240        self.assertEqual(msg.get_content_charset(), None)
5241
5242    def test_rfc2231_bad_character_in_charset(self):
5243        m = """\
5244Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
5245
5246"""
5247        msg = email.message_from_string(m)
5248        # This should return None because non-ascii characters in the charset
5249        # are not allowed.
5250        self.assertEqual(msg.get_content_charset(), None)
5251
5252    def test_rfc2231_bad_character_in_filename(self):
5253        m = '''\
5254Content-Disposition: inline;
5255\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
5256\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5257\tfilename*2*="is it not.pdf%E2"
5258
5259'''
5260        msg = email.message_from_string(m)
5261        self.assertEqual(msg.get_filename(),
5262                         'This is even more ***fun*** is it not.pdf\ufffd')
5263
5264    def test_rfc2231_unknown_encoding(self):
5265        m = """\
5266Content-Transfer-Encoding: 8bit
5267Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
5268
5269"""
5270        msg = email.message_from_string(m)
5271        self.assertEqual(msg.get_filename(), 'myfile.txt')
5272
5273    def test_rfc2231_single_tick_in_filename_extended(self):
5274        eq = self.assertEqual
5275        m = """\
5276Content-Type: application/x-foo;
5277\tname*0*=\"Frank's\"; name*1*=\" Document\"
5278
5279"""
5280        msg = email.message_from_string(m)
5281        charset, language, s = msg.get_param('name')
5282        eq(charset, None)
5283        eq(language, None)
5284        eq(s, "Frank's Document")
5285
5286    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5287    def test_rfc2231_single_tick_in_filename(self):
5288        m = """\
5289Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
5290
5291"""
5292        msg = email.message_from_string(m)
5293        param = msg.get_param('name')
5294        self.assertNotIsInstance(param, tuple)
5295        self.assertEqual(param, "Frank's Document")
5296
5297    def test_rfc2231_missing_tick(self):
5298        m = '''\
5299Content-Disposition: inline;
5300\tfilename*0*="'This%20is%20broken";
5301'''
5302        msg = email.message_from_string(m)
5303        self.assertEqual(
5304            msg.get_filename(),
5305            "'This is broken")
5306
5307    def test_rfc2231_missing_tick_with_encoded_non_ascii(self):
5308        m = '''\
5309Content-Disposition: inline;
5310\tfilename*0*="'This%20is%E2broken";
5311'''
5312        msg = email.message_from_string(m)
5313        self.assertEqual(
5314            msg.get_filename(),
5315            "'This is\ufffdbroken")
5316
5317    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
5318    def test_rfc2231_tick_attack_extended(self):
5319        eq = self.assertEqual
5320        m = """\
5321Content-Type: application/x-foo;
5322\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
5323
5324"""
5325        msg = email.message_from_string(m)
5326        charset, language, s = msg.get_param('name')
5327        eq(charset, 'us-ascii')
5328        eq(language, 'en-us')
5329        eq(s, "Frank's Document")
5330
5331    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
5332    def test_rfc2231_tick_attack(self):
5333        m = """\
5334Content-Type: application/x-foo;
5335\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5336
5337"""
5338        msg = email.message_from_string(m)
5339        param = msg.get_param('name')
5340        self.assertNotIsInstance(param, tuple)
5341        self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5342
5343    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
5344    def test_rfc2231_no_extended_values(self):
5345        eq = self.assertEqual
5346        m = """\
5347Content-Type: application/x-foo; name=\"Frank's Document\"
5348
5349"""
5350        msg = email.message_from_string(m)
5351        eq(msg.get_param('name'), "Frank's Document")
5352
5353    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
5354    def test_rfc2231_encoded_then_unencoded_segments(self):
5355        eq = self.assertEqual
5356        m = """\
5357Content-Type: application/x-foo;
5358\tname*0*=\"us-ascii'en-us'My\";
5359\tname*1=\" Document\";
5360\tname*2*=\" For You\"
5361
5362"""
5363        msg = email.message_from_string(m)
5364        charset, language, s = msg.get_param('name')
5365        eq(charset, 'us-ascii')
5366        eq(language, 'en-us')
5367        eq(s, 'My Document For You')
5368
5369    # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5370    # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
5371    def test_rfc2231_unencoded_then_encoded_segments(self):
5372        eq = self.assertEqual
5373        m = """\
5374Content-Type: application/x-foo;
5375\tname*0=\"us-ascii'en-us'My\";
5376\tname*1*=\" Document\";
5377\tname*2*=\" For You\"
5378
5379"""
5380        msg = email.message_from_string(m)
5381        charset, language, s = msg.get_param('name')
5382        eq(charset, 'us-ascii')
5383        eq(language, 'en-us')
5384        eq(s, 'My Document For You')
5385
5386    def test_should_not_hang_on_invalid_ew_messages(self):
5387        messages = ["""From: user@host.com
5388To: user@host.com
5389Bad-Header:
5390 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
5391 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
5392 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
5393
5394Hello!
5395""", """From: ����� �������� <xxx@xxx>
5396To: "xxx" <xxx@xxx>
5397Subject:   ��� ���������� ����� ����� � ��������� �� ����
5398MIME-Version: 1.0
5399Content-Type: text/plain; charset="windows-1251";
5400Content-Transfer-Encoding: 8bit
5401
5402�� ����� � ���� ������ ��� ��������
5403"""]
5404        for m in messages:
5405            with self.subTest(m=m):
5406                msg = email.message_from_string(m)
5407
5408
5409# Tests to ensure that signed parts of an email are completely preserved, as
5410# required by RFC1847 section 2.1.  Note that these are incomplete, because the
5411# email package does not currently always preserve the body.  See issue 1670765.
5412class TestSigned(TestEmailBase):
5413
5414    def _msg_and_obj(self, filename):
5415        with openfile(filename) as fp:
5416            original = fp.read()
5417            msg = email.message_from_string(original)
5418        return original, msg
5419
5420    def _signed_parts_eq(self, original, result):
5421        # Extract the first mime part of each message
5422        import re
5423        repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5424        inpart = repart.search(original).group(2)
5425        outpart = repart.search(result).group(2)
5426        self.assertEqual(outpart, inpart)
5427
5428    def test_long_headers_as_string(self):
5429        original, msg = self._msg_and_obj('msg_45.txt')
5430        result = msg.as_string()
5431        self._signed_parts_eq(original, result)
5432
5433    def test_long_headers_as_string_maxheaderlen(self):
5434        original, msg = self._msg_and_obj('msg_45.txt')
5435        result = msg.as_string(maxheaderlen=60)
5436        self._signed_parts_eq(original, result)
5437
5438    def test_long_headers_flatten(self):
5439        original, msg = self._msg_and_obj('msg_45.txt')
5440        fp = StringIO()
5441        Generator(fp).flatten(msg)
5442        result = fp.getvalue()
5443        self._signed_parts_eq(original, result)
5444
5445
5446
5447if __name__ == '__main__':
5448    unittest.main()
5449