• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2001-2010 Python Software Foundation
2# Contact: email-sig@python.org
3# email package unit tests
4
5import re
6import time
7import base64
8import unittest
9import textwrap
10
11from io import StringIO, BytesIO
12from itertools import chain
13from random import choice
14from socket import getfqdn
15from threading import Thread
16
17import email
18import email.policy
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator, BytesGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email.mime.nonmultipart import MIMENonMultipart
33from email import utils
34from email import errors
35from email import encoders
36from email import iterators
37from email import base64mime
38from email import quoprimime
39
40from test.support import unlink, start_threads
41from test.test_email import openfile, TestEmailBase
42
43# These imports are documented to work, but we are testing them using a
44# different path, so we import them here just to make sure they are importable.
45from email.parser import FeedParser, BytesFeedParser
46
47NL = '\n'
48EMPTYSTRING = ''
49SPACE = ' '
50
51
52# Test various aspects of the Message class's API
53class TestMessageAPI(TestEmailBase):
54    def test_get_all(self):
55        eq = self.assertEqual
56        msg = self._msgobj('msg_20.txt')
57        eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
58        eq(msg.get_all('xx', 'n/a'), 'n/a')
59
60    def test_getset_charset(self):
61        eq = self.assertEqual
62        msg = Message()
63        eq(msg.get_charset(), None)
64        charset = Charset('iso-8859-1')
65        msg.set_charset(charset)
66        eq(msg['mime-version'], '1.0')
67        eq(msg.get_content_type(), 'text/plain')
68        eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
69        eq(msg.get_param('charset'), 'iso-8859-1')
70        eq(msg['content-transfer-encoding'], 'quoted-printable')
71        eq(msg.get_charset().input_charset, 'iso-8859-1')
72        # Remove the charset
73        msg.set_charset(None)
74        eq(msg.get_charset(), None)
75        eq(msg['content-type'], 'text/plain')
76        # Try adding a charset when there's already MIME headers present
77        msg = Message()
78        msg['MIME-Version'] = '2.0'
79        msg['Content-Type'] = 'text/x-weird'
80        msg['Content-Transfer-Encoding'] = 'quinted-puntable'
81        msg.set_charset(charset)
82        eq(msg['mime-version'], '2.0')
83        eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
84        eq(msg['content-transfer-encoding'], 'quinted-puntable')
85
86    def test_set_charset_from_string(self):
87        eq = self.assertEqual
88        msg = Message()
89        msg.set_charset('us-ascii')
90        eq(msg.get_charset().input_charset, 'us-ascii')
91        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
92
93    def test_set_payload_with_charset(self):
94        msg = Message()
95        charset = Charset('iso-8859-1')
96        msg.set_payload('This is a string payload', charset)
97        self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
98
99    def test_set_payload_with_8bit_data_and_charset(self):
100        data = b'\xd0\x90\xd0\x91\xd0\x92'
101        charset = Charset('utf-8')
102        msg = Message()
103        msg.set_payload(data, charset)
104        self.assertEqual(msg['content-transfer-encoding'], 'base64')
105        self.assertEqual(msg.get_payload(decode=True), data)
106        self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
107
108    def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
109        data = b'\xd0\x90\xd0\x91\xd0\x92'
110        charset = Charset('utf-8')
111        charset.body_encoding = None # Disable base64 encoding
112        msg = Message()
113        msg.set_payload(data.decode('utf-8'), charset)
114        self.assertEqual(msg['content-transfer-encoding'], '8bit')
115        self.assertEqual(msg.get_payload(decode=True), data)
116
117    def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
118        data = b'\xd0\x90\xd0\x91\xd0\x92'
119        charset = Charset('utf-8')
120        charset.body_encoding = None # Disable base64 encoding
121        msg = Message()
122        msg.set_payload(data, charset)
123        self.assertEqual(msg['content-transfer-encoding'], '8bit')
124        self.assertEqual(msg.get_payload(decode=True), data)
125
126    def test_set_payload_to_list(self):
127        msg = Message()
128        msg.set_payload([])
129        self.assertEqual(msg.get_payload(), [])
130
131    def test_attach_when_payload_is_string(self):
132        msg = Message()
133        msg['Content-Type'] = 'multipart/mixed'
134        msg.set_payload('string payload')
135        sub_msg = MIMEMessage(Message())
136        self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart",
137                               msg.attach, sub_msg)
138
139    def test_get_charsets(self):
140        eq = self.assertEqual
141
142        msg = self._msgobj('msg_08.txt')
143        charsets = msg.get_charsets()
144        eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
145
146        msg = self._msgobj('msg_09.txt')
147        charsets = msg.get_charsets('dingbat')
148        eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
149                      'koi8-r'])
150
151        msg = self._msgobj('msg_12.txt')
152        charsets = msg.get_charsets()
153        eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
154                      'iso-8859-3', 'us-ascii', 'koi8-r'])
155
156    def test_get_filename(self):
157        eq = self.assertEqual
158
159        msg = self._msgobj('msg_04.txt')
160        filenames = [p.get_filename() for p in msg.get_payload()]
161        eq(filenames, ['msg.txt', 'msg.txt'])
162
163        msg = self._msgobj('msg_07.txt')
164        subpart = msg.get_payload(1)
165        eq(subpart.get_filename(), 'dingusfish.gif')
166
167    def test_get_filename_with_name_parameter(self):
168        eq = self.assertEqual
169
170        msg = self._msgobj('msg_44.txt')
171        filenames = [p.get_filename() for p in msg.get_payload()]
172        eq(filenames, ['msg.txt', 'msg.txt'])
173
174    def test_get_boundary(self):
175        eq = self.assertEqual
176        msg = self._msgobj('msg_07.txt')
177        # No quotes!
178        eq(msg.get_boundary(), 'BOUNDARY')
179
180    def test_set_boundary(self):
181        eq = self.assertEqual
182        # This one has no existing boundary parameter, but the Content-Type:
183        # header appears fifth.
184        msg = self._msgobj('msg_01.txt')
185        msg.set_boundary('BOUNDARY')
186        header, value = msg.items()[4]
187        eq(header.lower(), 'content-type')
188        eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
189        # This one has a Content-Type: header, with a boundary, stuck in the
190        # middle of its headers.  Make sure the order is preserved; it should
191        # be fifth.
192        msg = self._msgobj('msg_04.txt')
193        msg.set_boundary('BOUNDARY')
194        header, value = msg.items()[4]
195        eq(header.lower(), 'content-type')
196        eq(value, 'multipart/mixed; boundary="BOUNDARY"')
197        # And this one has no Content-Type: header at all.
198        msg = self._msgobj('msg_03.txt')
199        self.assertRaises(errors.HeaderParseError,
200                          msg.set_boundary, 'BOUNDARY')
201
202    def test_make_boundary(self):
203        msg = MIMEMultipart('form-data')
204        # Note that when the boundary gets created is an implementation
205        # detail and might change.
206        self.assertEqual(msg.items()[0][1], 'multipart/form-data')
207        # Trigger creation of boundary
208        msg.as_string()
209        self.assertEqual(msg.items()[0][1][:33],
210                        'multipart/form-data; boundary="==')
211        # XXX: there ought to be tests of the uniqueness of the boundary, too.
212
213    def test_message_rfc822_only(self):
214        # Issue 7970: message/rfc822 not in multipart parsed by
215        # HeaderParser caused an exception when flattened.
216        with openfile('msg_46.txt') as fp:
217            msgdata = fp.read()
218        parser = HeaderParser()
219        msg = parser.parsestr(msgdata)
220        out = StringIO()
221        gen = Generator(out, True, 0)
222        gen.flatten(msg, False)
223        self.assertEqual(out.getvalue(), msgdata)
224
225    def test_byte_message_rfc822_only(self):
226        # Make sure new bytes header parser also passes this.
227        with openfile('msg_46.txt') as fp:
228            msgdata = fp.read().encode('ascii')
229        parser = email.parser.BytesHeaderParser()
230        msg = parser.parsebytes(msgdata)
231        out = BytesIO()
232        gen = email.generator.BytesGenerator(out)
233        gen.flatten(msg)
234        self.assertEqual(out.getvalue(), msgdata)
235
236    def test_get_decoded_payload(self):
237        eq = self.assertEqual
238        msg = self._msgobj('msg_10.txt')
239        # The outer message is a multipart
240        eq(msg.get_payload(decode=True), None)
241        # Subpart 1 is 7bit encoded
242        eq(msg.get_payload(0).get_payload(decode=True),
243           b'This is a 7bit encoded message.\n')
244        # Subpart 2 is quopri
245        eq(msg.get_payload(1).get_payload(decode=True),
246           b'\xa1This is a Quoted Printable encoded message!\n')
247        # Subpart 3 is base64
248        eq(msg.get_payload(2).get_payload(decode=True),
249           b'This is a Base64 encoded message.')
250        # Subpart 4 is base64 with a trailing newline, which
251        # used to be stripped (issue 7143).
252        eq(msg.get_payload(3).get_payload(decode=True),
253           b'This is a Base64 encoded message.\n')
254        # Subpart 5 has no Content-Transfer-Encoding: header.
255        eq(msg.get_payload(4).get_payload(decode=True),
256           b'This has no Content-Transfer-Encoding: header.\n')
257
258    def test_get_decoded_uu_payload(self):
259        eq = self.assertEqual
260        msg = Message()
261        msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
262        for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
263            msg['content-transfer-encoding'] = cte
264            eq(msg.get_payload(decode=True), b'hello world')
265        # Now try some bogus data
266        msg.set_payload('foo')
267        eq(msg.get_payload(decode=True), b'foo')
268
269    def test_get_payload_n_raises_on_non_multipart(self):
270        msg = Message()
271        self.assertRaises(TypeError, msg.get_payload, 1)
272
273    def test_decoded_generator(self):
274        eq = self.assertEqual
275        msg = self._msgobj('msg_07.txt')
276        with openfile('msg_17.txt') as fp:
277            text = fp.read()
278        s = StringIO()
279        g = DecodedGenerator(s)
280        g.flatten(msg)
281        eq(s.getvalue(), text)
282
283    def test__contains__(self):
284        msg = Message()
285        msg['From'] = 'Me'
286        msg['to'] = 'You'
287        # Check for case insensitivity
288        self.assertIn('from', msg)
289        self.assertIn('From', msg)
290        self.assertIn('FROM', msg)
291        self.assertIn('to', msg)
292        self.assertIn('To', msg)
293        self.assertIn('TO', msg)
294
295    def test_as_string(self):
296        msg = self._msgobj('msg_01.txt')
297        with openfile('msg_01.txt') as fp:
298            text = fp.read()
299        self.assertEqual(text, str(msg))
300        fullrepr = msg.as_string(unixfrom=True)
301        lines = fullrepr.split('\n')
302        self.assertTrue(lines[0].startswith('From '))
303        self.assertEqual(text, NL.join(lines[1:]))
304
305    def test_as_string_policy(self):
306        msg = self._msgobj('msg_01.txt')
307        newpolicy = msg.policy.clone(linesep='\r\n')
308        fullrepr = msg.as_string(policy=newpolicy)
309        s = StringIO()
310        g = Generator(s, policy=newpolicy)
311        g.flatten(msg)
312        self.assertEqual(fullrepr, s.getvalue())
313
314    def test_as_bytes(self):
315        msg = self._msgobj('msg_01.txt')
316        with openfile('msg_01.txt') as fp:
317            data = fp.read().encode('ascii')
318        self.assertEqual(data, bytes(msg))
319        fullrepr = msg.as_bytes(unixfrom=True)
320        lines = fullrepr.split(b'\n')
321        self.assertTrue(lines[0].startswith(b'From '))
322        self.assertEqual(data, b'\n'.join(lines[1:]))
323
324    def test_as_bytes_policy(self):
325        msg = self._msgobj('msg_01.txt')
326        newpolicy = msg.policy.clone(linesep='\r\n')
327        fullrepr = msg.as_bytes(policy=newpolicy)
328        s = BytesIO()
329        g = BytesGenerator(s,policy=newpolicy)
330        g.flatten(msg)
331        self.assertEqual(fullrepr, s.getvalue())
332
333    # test_headerregistry.TestContentTypeHeader.bad_params
334    def test_bad_param(self):
335        msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
336        self.assertEqual(msg.get_param('baz'), '')
337
338    def test_missing_filename(self):
339        msg = email.message_from_string("From: foo\n")
340        self.assertEqual(msg.get_filename(), None)
341
342    def test_bogus_filename(self):
343        msg = email.message_from_string(
344        "Content-Disposition: blarg; filename\n")
345        self.assertEqual(msg.get_filename(), '')
346
347    def test_missing_boundary(self):
348        msg = email.message_from_string("From: foo\n")
349        self.assertEqual(msg.get_boundary(), None)
350
351    def test_get_params(self):
352        eq = self.assertEqual
353        msg = email.message_from_string(
354            'X-Header: foo=one; bar=two; baz=three\n')
355        eq(msg.get_params(header='x-header'),
356           [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
357        msg = email.message_from_string(
358            'X-Header: foo; bar=one; baz=two\n')
359        eq(msg.get_params(header='x-header'),
360           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
361        eq(msg.get_params(), None)
362        msg = email.message_from_string(
363            'X-Header: foo; bar="one"; baz=two\n')
364        eq(msg.get_params(header='x-header'),
365           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
366
367    # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
368    def test_get_param_liberal(self):
369        msg = Message()
370        msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
371        self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
372
373    def test_get_param(self):
374        eq = self.assertEqual
375        msg = email.message_from_string(
376            "X-Header: foo=one; bar=two; baz=three\n")
377        eq(msg.get_param('bar', header='x-header'), 'two')
378        eq(msg.get_param('quuz', header='x-header'), None)
379        eq(msg.get_param('quuz'), None)
380        msg = email.message_from_string(
381            'X-Header: foo; bar="one"; baz=two\n')
382        eq(msg.get_param('foo', header='x-header'), '')
383        eq(msg.get_param('bar', header='x-header'), 'one')
384        eq(msg.get_param('baz', header='x-header'), 'two')
385        # XXX: We are not RFC-2045 compliant!  We cannot parse:
386        # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
387        # msg.get_param("weird")
388        # yet.
389
390    # test_headerregistry.TestContentTypeHeader.spaces_around_semis
391    def test_get_param_funky_continuation_lines(self):
392        msg = self._msgobj('msg_22.txt')
393        self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
394
395    # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
396    def test_get_param_with_semis_in_quotes(self):
397        msg = email.message_from_string(
398            'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
399        self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
400        self.assertEqual(msg.get_param('name', unquote=False),
401                         '"Jim&amp;&amp;Jill"')
402
403    # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
404    def test_get_param_with_quotes(self):
405        msg = email.message_from_string(
406            'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
407        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
408        msg = email.message_from_string(
409            "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
410        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
411
412    def test_field_containment(self):
413        msg = email.message_from_string('Header: exists')
414        self.assertIn('header', msg)
415        self.assertIn('Header', msg)
416        self.assertIn('HEADER', msg)
417        self.assertNotIn('headerx', msg)
418
419    def test_set_param(self):
420        eq = self.assertEqual
421        msg = Message()
422        msg.set_param('charset', 'iso-2022-jp')
423        eq(msg.get_param('charset'), 'iso-2022-jp')
424        msg.set_param('importance', 'high value')
425        eq(msg.get_param('importance'), 'high value')
426        eq(msg.get_param('importance', unquote=False), '"high value"')
427        eq(msg.get_params(), [('text/plain', ''),
428                              ('charset', 'iso-2022-jp'),
429                              ('importance', 'high value')])
430        eq(msg.get_params(unquote=False), [('text/plain', ''),
431                                       ('charset', '"iso-2022-jp"'),
432                                       ('importance', '"high value"')])
433        msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
434        eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
435
436    def test_del_param(self):
437        eq = self.assertEqual
438        msg = self._msgobj('msg_05.txt')
439        eq(msg.get_params(),
440           [('multipart/report', ''), ('report-type', 'delivery-status'),
441            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
442        old_val = msg.get_param("report-type")
443        msg.del_param("report-type")
444        eq(msg.get_params(),
445           [('multipart/report', ''),
446            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
447        msg.set_param("report-type", old_val)
448        eq(msg.get_params(),
449           [('multipart/report', ''),
450            ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
451            ('report-type', old_val)])
452
453    def test_del_param_on_other_header(self):
454        msg = Message()
455        msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
456        msg.del_param('filename', 'content-disposition')
457        self.assertEqual(msg['content-disposition'], 'attachment')
458
459    def test_del_param_on_nonexistent_header(self):
460        msg = Message()
461        # Deleting param on empty msg should not raise exception.
462        msg.del_param('filename', 'content-disposition')
463
464    def test_del_nonexistent_param(self):
465        msg = Message()
466        msg.add_header('Content-Type', 'text/plain', charset='utf-8')
467        existing_header = msg['Content-Type']
468        msg.del_param('foobar', header='Content-Type')
469        self.assertEqual(msg['Content-Type'], existing_header)
470
471    def test_set_type(self):
472        eq = self.assertEqual
473        msg = Message()
474        self.assertRaises(ValueError, msg.set_type, 'text')
475        msg.set_type('text/plain')
476        eq(msg['content-type'], 'text/plain')
477        msg.set_param('charset', 'us-ascii')
478        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
479        msg.set_type('text/html')
480        eq(msg['content-type'], 'text/html; charset="us-ascii"')
481
482    def test_set_type_on_other_header(self):
483        msg = Message()
484        msg['X-Content-Type'] = 'text/plain'
485        msg.set_type('application/octet-stream', 'X-Content-Type')
486        self.assertEqual(msg['x-content-type'], 'application/octet-stream')
487
488    def test_get_content_type_missing(self):
489        msg = Message()
490        self.assertEqual(msg.get_content_type(), 'text/plain')
491
492    def test_get_content_type_missing_with_default_type(self):
493        msg = Message()
494        msg.set_default_type('message/rfc822')
495        self.assertEqual(msg.get_content_type(), 'message/rfc822')
496
497    def test_get_content_type_from_message_implicit(self):
498        msg = self._msgobj('msg_30.txt')
499        self.assertEqual(msg.get_payload(0).get_content_type(),
500                         'message/rfc822')
501
502    def test_get_content_type_from_message_explicit(self):
503        msg = self._msgobj('msg_28.txt')
504        self.assertEqual(msg.get_payload(0).get_content_type(),
505                         'message/rfc822')
506
507    def test_get_content_type_from_message_text_plain_implicit(self):
508        msg = self._msgobj('msg_03.txt')
509        self.assertEqual(msg.get_content_type(), 'text/plain')
510
511    def test_get_content_type_from_message_text_plain_explicit(self):
512        msg = self._msgobj('msg_01.txt')
513        self.assertEqual(msg.get_content_type(), 'text/plain')
514
515    def test_get_content_maintype_missing(self):
516        msg = Message()
517        self.assertEqual(msg.get_content_maintype(), 'text')
518
519    def test_get_content_maintype_missing_with_default_type(self):
520        msg = Message()
521        msg.set_default_type('message/rfc822')
522        self.assertEqual(msg.get_content_maintype(), 'message')
523
524    def test_get_content_maintype_from_message_implicit(self):
525        msg = self._msgobj('msg_30.txt')
526        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
527
528    def test_get_content_maintype_from_message_explicit(self):
529        msg = self._msgobj('msg_28.txt')
530        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
531
532    def test_get_content_maintype_from_message_text_plain_implicit(self):
533        msg = self._msgobj('msg_03.txt')
534        self.assertEqual(msg.get_content_maintype(), 'text')
535
536    def test_get_content_maintype_from_message_text_plain_explicit(self):
537        msg = self._msgobj('msg_01.txt')
538        self.assertEqual(msg.get_content_maintype(), 'text')
539
540    def test_get_content_subtype_missing(self):
541        msg = Message()
542        self.assertEqual(msg.get_content_subtype(), 'plain')
543
544    def test_get_content_subtype_missing_with_default_type(self):
545        msg = Message()
546        msg.set_default_type('message/rfc822')
547        self.assertEqual(msg.get_content_subtype(), 'rfc822')
548
549    def test_get_content_subtype_from_message_implicit(self):
550        msg = self._msgobj('msg_30.txt')
551        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
552
553    def test_get_content_subtype_from_message_explicit(self):
554        msg = self._msgobj('msg_28.txt')
555        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
556
557    def test_get_content_subtype_from_message_text_plain_implicit(self):
558        msg = self._msgobj('msg_03.txt')
559        self.assertEqual(msg.get_content_subtype(), 'plain')
560
561    def test_get_content_subtype_from_message_text_plain_explicit(self):
562        msg = self._msgobj('msg_01.txt')
563        self.assertEqual(msg.get_content_subtype(), 'plain')
564
565    def test_get_content_maintype_error(self):
566        msg = Message()
567        msg['Content-Type'] = 'no-slash-in-this-string'
568        self.assertEqual(msg.get_content_maintype(), 'text')
569
570    def test_get_content_subtype_error(self):
571        msg = Message()
572        msg['Content-Type'] = 'no-slash-in-this-string'
573        self.assertEqual(msg.get_content_subtype(), 'plain')
574
575    def test_replace_header(self):
576        eq = self.assertEqual
577        msg = Message()
578        msg.add_header('First', 'One')
579        msg.add_header('Second', 'Two')
580        msg.add_header('Third', 'Three')
581        eq(msg.keys(), ['First', 'Second', 'Third'])
582        eq(msg.values(), ['One', 'Two', 'Three'])
583        msg.replace_header('Second', 'Twenty')
584        eq(msg.keys(), ['First', 'Second', 'Third'])
585        eq(msg.values(), ['One', 'Twenty', 'Three'])
586        msg.add_header('First', 'Eleven')
587        msg.replace_header('First', 'One Hundred')
588        eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
589        eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
590        self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
591
592    def test_get_content_disposition(self):
593        msg = Message()
594        self.assertIsNone(msg.get_content_disposition())
595        msg.add_header('Content-Disposition', 'attachment',
596                       filename='random.avi')
597        self.assertEqual(msg.get_content_disposition(), 'attachment')
598        msg.replace_header('Content-Disposition', 'inline')
599        self.assertEqual(msg.get_content_disposition(), 'inline')
600        msg.replace_header('Content-Disposition', 'InlinE')
601        self.assertEqual(msg.get_content_disposition(), 'inline')
602
603    # test_defect_handling:test_invalid_chars_in_base64_payload
604    def test_broken_base64_payload(self):
605        x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
606        msg = Message()
607        msg['content-type'] = 'audio/x-midi'
608        msg['content-transfer-encoding'] = 'base64'
609        msg.set_payload(x)
610        self.assertEqual(msg.get_payload(decode=True),
611                         (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
612                          b'\xa1\x00p\xf6\xbf\xe9\x0f'))
613        self.assertIsInstance(msg.defects[0],
614                              errors.InvalidBase64CharactersDefect)
615
616    def test_broken_unicode_payload(self):
617        # This test improves coverage but is not a compliance test.
618        # The behavior in this situation is currently undefined by the API.
619        x = 'this is a br\xf6ken thing to do'
620        msg = Message()
621        msg['content-type'] = 'text/plain'
622        msg['content-transfer-encoding'] = '8bit'
623        msg.set_payload(x)
624        self.assertEqual(msg.get_payload(decode=True),
625                         bytes(x, 'raw-unicode-escape'))
626
627    def test_questionable_bytes_payload(self):
628        # This test improves coverage but is not a compliance test,
629        # since it involves poking inside the black box.
630        x = 'this is a quéstionable thing to do'.encode('utf-8')
631        msg = Message()
632        msg['content-type'] = 'text/plain; charset="utf-8"'
633        msg['content-transfer-encoding'] = '8bit'
634        msg._payload = x
635        self.assertEqual(msg.get_payload(decode=True), x)
636
637    # Issue 1078919
638    def test_ascii_add_header(self):
639        msg = Message()
640        msg.add_header('Content-Disposition', 'attachment',
641                       filename='bud.gif')
642        self.assertEqual('attachment; filename="bud.gif"',
643            msg['Content-Disposition'])
644
645    def test_noascii_add_header(self):
646        msg = Message()
647        msg.add_header('Content-Disposition', 'attachment',
648            filename="Fußballer.ppt")
649        self.assertEqual(
650            'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
651            msg['Content-Disposition'])
652
653    def test_nonascii_add_header_via_triple(self):
654        msg = Message()
655        msg.add_header('Content-Disposition', 'attachment',
656            filename=('iso-8859-1', '', 'Fußballer.ppt'))
657        self.assertEqual(
658            'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
659            msg['Content-Disposition'])
660
661    def test_ascii_add_header_with_tspecial(self):
662        msg = Message()
663        msg.add_header('Content-Disposition', 'attachment',
664            filename="windows [filename].ppt")
665        self.assertEqual(
666            'attachment; filename="windows [filename].ppt"',
667            msg['Content-Disposition'])
668
669    def test_nonascii_add_header_with_tspecial(self):
670        msg = Message()
671        msg.add_header('Content-Disposition', 'attachment',
672            filename="Fußballer [filename].ppt")
673        self.assertEqual(
674            "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
675            msg['Content-Disposition'])
676
677    def test_binary_quopri_payload(self):
678        for charset in ('latin-1', 'ascii'):
679            msg = Message()
680            msg['content-type'] = 'text/plain; charset=%s' % charset
681            msg['content-transfer-encoding'] = 'quoted-printable'
682            msg.set_payload(b'foo=e6=96=87bar')
683            self.assertEqual(
684                msg.get_payload(decode=True),
685                b'foo\xe6\x96\x87bar',
686                'get_payload returns wrong result with charset %s.' % charset)
687
688    def test_binary_base64_payload(self):
689        for charset in ('latin-1', 'ascii'):
690            msg = Message()
691            msg['content-type'] = 'text/plain; charset=%s' % charset
692            msg['content-transfer-encoding'] = 'base64'
693            msg.set_payload(b'Zm9v5paHYmFy')
694            self.assertEqual(
695                msg.get_payload(decode=True),
696                b'foo\xe6\x96\x87bar',
697                'get_payload returns wrong result with charset %s.' % charset)
698
699    def test_binary_uuencode_payload(self):
700        for charset in ('latin-1', 'ascii'):
701            for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
702                msg = Message()
703                msg['content-type'] = 'text/plain; charset=%s' % charset
704                msg['content-transfer-encoding'] = encoding
705                msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
706                self.assertEqual(
707                    msg.get_payload(decode=True),
708                    b'foo\xe6\x96\x87bar',
709                    str(('get_payload returns wrong result ',
710                         'with charset {0} and encoding {1}.')).\
711                        format(charset, encoding))
712
713    def test_add_header_with_name_only_param(self):
714        msg = Message()
715        msg.add_header('Content-Disposition', 'inline', foo_bar=None)
716        self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
717
718    def test_add_header_with_no_value(self):
719        msg = Message()
720        msg.add_header('X-Status', None)
721        self.assertEqual('', msg['X-Status'])
722
723    # Issue 5871: reject an attempt to embed a header inside a header value
724    # (header injection attack).
725    def test_embedded_header_via_Header_rejected(self):
726        msg = Message()
727        msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
728        self.assertRaises(errors.HeaderParseError, msg.as_string)
729
730    def test_embedded_header_via_string_rejected(self):
731        msg = Message()
732        msg['Dummy'] = 'dummy\nX-Injected-Header: test'
733        self.assertRaises(errors.HeaderParseError, msg.as_string)
734
735    def test_unicode_header_defaults_to_utf8_encoding(self):
736        # Issue 14291
737        m = MIMEText('abc\n')
738        m['Subject'] = 'É test'
739        self.assertEqual(str(m),textwrap.dedent("""\
740            Content-Type: text/plain; charset="us-ascii"
741            MIME-Version: 1.0
742            Content-Transfer-Encoding: 7bit
743            Subject: =?utf-8?q?=C3=89_test?=
744
745            abc
746            """))
747
748    def test_unicode_body_defaults_to_utf8_encoding(self):
749        # Issue 14291
750        m = MIMEText('É testabc\n')
751        self.assertEqual(str(m),textwrap.dedent("""\
752            Content-Type: text/plain; charset="utf-8"
753            MIME-Version: 1.0
754            Content-Transfer-Encoding: base64
755
756            w4kgdGVzdGFiYwo=
757            """))
758
759
760# Test the email.encoders module
761class TestEncoders(unittest.TestCase):
762
763    def test_EncodersEncode_base64(self):
764        with openfile('PyBanner048.gif', 'rb') as fp:
765            bindata = fp.read()
766        mimed = email.mime.image.MIMEImage(bindata)
767        base64ed = mimed.get_payload()
768        # the transfer-encoded body lines should all be <=76 characters
769        lines = base64ed.split('\n')
770        self.assertLessEqual(max([ len(x) for x in lines ]), 76)
771
772    def test_encode_empty_payload(self):
773        eq = self.assertEqual
774        msg = Message()
775        msg.set_charset('us-ascii')
776        eq(msg['content-transfer-encoding'], '7bit')
777
778    def test_default_cte(self):
779        eq = self.assertEqual
780        # 7bit data and the default us-ascii _charset
781        msg = MIMEText('hello world')
782        eq(msg['content-transfer-encoding'], '7bit')
783        # Similar, but with 8bit data
784        msg = MIMEText('hello \xf8 world')
785        eq(msg['content-transfer-encoding'], 'base64')
786        # And now with a different charset
787        msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
788        eq(msg['content-transfer-encoding'], 'quoted-printable')
789
790    def test_encode7or8bit(self):
791        # Make sure a charset whose input character set is 8bit but
792        # whose output character set is 7bit gets a transfer-encoding
793        # of 7bit.
794        eq = self.assertEqual
795        msg = MIMEText('文\n', _charset='euc-jp')
796        eq(msg['content-transfer-encoding'], '7bit')
797        eq(msg.as_string(), textwrap.dedent("""\
798            MIME-Version: 1.0
799            Content-Type: text/plain; charset="iso-2022-jp"
800            Content-Transfer-Encoding: 7bit
801
802            \x1b$BJ8\x1b(B
803            """))
804
805    def test_qp_encode_latin1(self):
806        msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
807        self.assertEqual(str(msg), textwrap.dedent("""\
808            MIME-Version: 1.0
809            Content-Type: text/text; charset="iso-8859-1"
810            Content-Transfer-Encoding: quoted-printable
811
812            =E1=F6
813            """))
814
815    def test_qp_encode_non_latin1(self):
816        # Issue 16948
817        msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
818        self.assertEqual(str(msg), textwrap.dedent("""\
819            MIME-Version: 1.0
820            Content-Type: text/text; charset="iso-8859-2"
821            Content-Transfer-Encoding: quoted-printable
822
823            =BF
824            """))
825
826
827# Test long header wrapping
828class TestLongHeaders(TestEmailBase):
829
830    maxDiff = None
831
832    def test_split_long_continuation(self):
833        eq = self.ndiffAssertEqual
834        msg = email.message_from_string("""\
835Subject: bug demonstration
836\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
837\tmore text
838
839test
840""")
841        sfp = StringIO()
842        g = Generator(sfp)
843        g.flatten(msg)
844        eq(sfp.getvalue(), """\
845Subject: bug demonstration
846\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
847\tmore text
848
849test
850""")
851
852    def test_another_long_almost_unsplittable_header(self):
853        eq = self.ndiffAssertEqual
854        hstr = """\
855bug demonstration
856\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
857\tmore text"""
858        h = Header(hstr, continuation_ws='\t')
859        eq(h.encode(), """\
860bug demonstration
861\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
862\tmore text""")
863        h = Header(hstr.replace('\t', ' '))
864        eq(h.encode(), """\
865bug demonstration
866 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
867 more text""")
868
869    def test_long_nonstring(self):
870        eq = self.ndiffAssertEqual
871        g = Charset("iso-8859-1")
872        cz = Charset("iso-8859-2")
873        utf8 = Charset("utf-8")
874        g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
875                  b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
876                  b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
877                  b'bef\xf6rdert. ')
878        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
879                   b'd\xf9vtipu.. ')
880        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
881                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
882                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
883                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
884                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
885                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
886                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
887                     '\u3044\u307e\u3059\u3002')
888        h = Header(g_head, g, header_name='Subject')
889        h.append(cz_head, cz)
890        h.append(utf8_head, utf8)
891        msg = Message()
892        msg['Subject'] = h
893        sfp = StringIO()
894        g = Generator(sfp)
895        g.flatten(msg)
896        eq(sfp.getvalue(), """\
897Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
898 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
899 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
900 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
901 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
902 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
903 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
904 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
905 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
906 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
907 =?utf-8?b?44CC?=
908
909""")
910        eq(h.encode(maxlinelen=76), """\
911=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
912 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
913 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
914 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
915 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
916 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
917 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
918 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
919 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
920 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
921 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
922
923    def test_long_header_encode(self):
924        eq = self.ndiffAssertEqual
925        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
926                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
927                   header_name='X-Foobar-Spoink-Defrobnit')
928        eq(h.encode(), '''\
929wasnipoop; giraffes="very-long-necked-animals";
930 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
931
932    def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
933        eq = self.ndiffAssertEqual
934        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
935                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
936                   header_name='X-Foobar-Spoink-Defrobnit',
937                   continuation_ws='\t')
938        eq(h.encode(), '''\
939wasnipoop; giraffes="very-long-necked-animals";
940 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
941
942    def test_long_header_encode_with_tab_continuation(self):
943        eq = self.ndiffAssertEqual
944        h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
945                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
946                   header_name='X-Foobar-Spoink-Defrobnit',
947                   continuation_ws='\t')
948        eq(h.encode(), '''\
949wasnipoop; giraffes="very-long-necked-animals";
950\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
951
952    def test_header_encode_with_different_output_charset(self):
953        h = Header('文', 'euc-jp')
954        self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
955
956    def test_long_header_encode_with_different_output_charset(self):
957        h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
958            b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
959            b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
960            b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
961        res = """\
962=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
963 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
964        self.assertEqual(h.encode(), res)
965
966    def test_header_splitter(self):
967        eq = self.ndiffAssertEqual
968        msg = MIMEText('')
969        # It'd be great if we could use add_header() here, but that doesn't
970        # guarantee an order of the parameters.
971        msg['X-Foobar-Spoink-Defrobnit'] = (
972            'wasnipoop; giraffes="very-long-necked-animals"; '
973            'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
974        sfp = StringIO()
975        g = Generator(sfp)
976        g.flatten(msg)
977        eq(sfp.getvalue(), '''\
978Content-Type: text/plain; charset="us-ascii"
979MIME-Version: 1.0
980Content-Transfer-Encoding: 7bit
981X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
982 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
983
984''')
985
986    def test_no_semis_header_splitter(self):
987        eq = self.ndiffAssertEqual
988        msg = Message()
989        msg['From'] = 'test@dom.ain'
990        msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
991        msg.set_payload('Test')
992        sfp = StringIO()
993        g = Generator(sfp)
994        g.flatten(msg)
995        eq(sfp.getvalue(), """\
996From: test@dom.ain
997References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
998 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
999
1000Test""")
1001
1002    def test_last_split_chunk_does_not_fit(self):
1003        eq = self.ndiffAssertEqual
1004        h = Header('Subject: the first part of this is short, but_the_second'
1005            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1006            '_all_by_itself')
1007        eq(h.encode(), """\
1008Subject: the first part of this is short,
1009 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1010
1011    def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
1012        eq = self.ndiffAssertEqual
1013        h = Header(', but_the_second'
1014            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1015            '_all_by_itself')
1016        eq(h.encode(), """\
1017,
1018 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1019
1020    def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
1021        eq = self.ndiffAssertEqual
1022        h = Header(', , but_the_second'
1023            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1024            '_all_by_itself')
1025        eq(h.encode(), """\
1026, ,
1027 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1028
1029    def test_trailing_splitable_on_overlong_unsplitable(self):
1030        eq = self.ndiffAssertEqual
1031        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1032            'be_on_a_line_all_by_itself;')
1033        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
1034            "be_on_a_line_all_by_itself;")
1035
1036    def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
1037        eq = self.ndiffAssertEqual
1038        h = Header('; '
1039            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1040            'be_on_a_line_all_by_itself; ')
1041        eq(h.encode(), """\
1042;
1043 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1044
1045    def test_long_header_with_multiple_sequential_split_chars(self):
1046        eq = self.ndiffAssertEqual
1047        h = Header('This is a long line that has two whitespaces  in a row.  '
1048            'This used to cause truncation of the header when folded')
1049        eq(h.encode(), """\
1050This is a long line that has two whitespaces  in a row.  This used to cause
1051 truncation of the header when folded""")
1052
1053    def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
1054        eq = self.ndiffAssertEqual
1055        h = Header('thisverylongheaderhas;semicolons;and,commas,but'
1056            'they;arenotlegal;fold,points')
1057        eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
1058                        "arenotlegal;fold,points")
1059
1060    def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
1061        eq = self.ndiffAssertEqual
1062        h = Header('this is a  test where we need to have more than one line '
1063            'before; our final line that is just too big to fit;; '
1064            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1065            'be_on_a_line_all_by_itself;')
1066        eq(h.encode(), """\
1067this is a  test where we need to have more than one line before;
1068 our final line that is just too big to fit;;
1069 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
1070
1071    def test_overlong_last_part_followed_by_split_point(self):
1072        eq = self.ndiffAssertEqual
1073        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1074            'be_on_a_line_all_by_itself ')
1075        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
1076                        "should_be_on_a_line_all_by_itself ")
1077
1078    def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
1079        eq = self.ndiffAssertEqual
1080        h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
1081            'before_our_final_line_; ; '
1082            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1083            'be_on_a_line_all_by_itself; ')
1084        eq(h.encode(), """\
1085this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
1086 ;
1087 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1088
1089    def test_multiline_with_overlong_last_part_followed_by_split_point(self):
1090        eq = self.ndiffAssertEqual
1091        h = Header('this is a test where we need to have more than one line '
1092            'before our final line; ; '
1093            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1094            'be_on_a_line_all_by_itself; ')
1095        eq(h.encode(), """\
1096this is a test where we need to have more than one line before our final line;
1097 ;
1098 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1099
1100    def test_long_header_with_whitespace_runs(self):
1101        eq = self.ndiffAssertEqual
1102        msg = Message()
1103        msg['From'] = 'test@dom.ain'
1104        msg['References'] = SPACE.join(['<foo@dom.ain>  '] * 10)
1105        msg.set_payload('Test')
1106        sfp = StringIO()
1107        g = Generator(sfp)
1108        g.flatten(msg)
1109        eq(sfp.getvalue(), """\
1110From: test@dom.ain
1111References: <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>
1112   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>
1113   <foo@dom.ain>   <foo@dom.ain>\x20\x20
1114
1115Test""")
1116
1117    def test_long_run_with_semi_header_splitter(self):
1118        eq = self.ndiffAssertEqual
1119        msg = Message()
1120        msg['From'] = 'test@dom.ain'
1121        msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
1122        msg.set_payload('Test')
1123        sfp = StringIO()
1124        g = Generator(sfp)
1125        g.flatten(msg)
1126        eq(sfp.getvalue(), """\
1127From: test@dom.ain
1128References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1129 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1130 <foo@dom.ain>; abc
1131
1132Test""")
1133
1134    def test_splitter_split_on_punctuation_only_if_fws(self):
1135        eq = self.ndiffAssertEqual
1136        msg = Message()
1137        msg['From'] = 'test@dom.ain'
1138        msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1139            'they;arenotlegal;fold,points')
1140        msg.set_payload('Test')
1141        sfp = StringIO()
1142        g = Generator(sfp)
1143        g.flatten(msg)
1144        # XXX the space after the header should not be there.
1145        eq(sfp.getvalue(), """\
1146From: test@dom.ain
1147References:\x20
1148 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1149
1150Test""")
1151
1152    def test_no_split_long_header(self):
1153        eq = self.ndiffAssertEqual
1154        hstr = 'References: ' + 'x' * 80
1155        h = Header(hstr)
1156        # These come on two lines because Headers are really field value
1157        # classes and don't really know about their field names.
1158        eq(h.encode(), """\
1159References:
1160 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1161        h = Header('x' * 80)
1162        eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
1163
1164    def test_splitting_multiple_long_lines(self):
1165        eq = self.ndiffAssertEqual
1166        hstr = """\
1167from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1168\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1169\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1170"""
1171        h = Header(hstr, continuation_ws='\t')
1172        eq(h.encode(), """\
1173from babylon.socal-raves.org (localhost [127.0.0.1]);
1174 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1175 for <mailman-admin@babylon.socal-raves.org>;
1176 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1177\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1178 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1179 for <mailman-admin@babylon.socal-raves.org>;
1180 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1181\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1182 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1183 for <mailman-admin@babylon.socal-raves.org>;
1184 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1185
1186    def test_splitting_first_line_only_is_long(self):
1187        eq = self.ndiffAssertEqual
1188        hstr = """\
1189from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1190\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1191\tid 17k4h5-00034i-00
1192\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1193        h = Header(hstr, maxlinelen=78, header_name='Received',
1194                   continuation_ws='\t')
1195        eq(h.encode(), """\
1196from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1197 helo=cthulhu.gerg.ca)
1198\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1199\tid 17k4h5-00034i-00
1200\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1201
1202    def test_long_8bit_header(self):
1203        eq = self.ndiffAssertEqual
1204        msg = Message()
1205        h = Header('Britische Regierung gibt', 'iso-8859-1',
1206                    header_name='Subject')
1207        h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
1208        eq(h.encode(maxlinelen=76), """\
1209=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1210 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
1211        msg['Subject'] = h
1212        eq(msg.as_string(maxheaderlen=76), """\
1213Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1214 =?iso-8859-1?q?hore-Windkraftprojekte?=
1215
1216""")
1217        eq(msg.as_string(maxheaderlen=0), """\
1218Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
1219
1220""")
1221
1222    def test_long_8bit_header_no_charset(self):
1223        eq = self.ndiffAssertEqual
1224        msg = Message()
1225        header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1226                         'f\xfcr Offshore-Windkraftprojekte '
1227                         '<a-very-long-address@example.com>')
1228        msg['Reply-To'] = header_string
1229        eq(msg.as_string(maxheaderlen=78), """\
1230Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1231 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1232
1233""")
1234        msg = Message()
1235        msg['Reply-To'] = Header(header_string,
1236                                 header_name='Reply-To')
1237        eq(msg.as_string(maxheaderlen=78), """\
1238Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1239 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1240
1241""")
1242
1243    def test_long_to_header(self):
1244        eq = self.ndiffAssertEqual
1245        to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
1246              '<someone@eecs.umich.edu>, '
1247              '"Someone Test #B" <someone@umich.edu>, '
1248              '"Someone Test #C" <someone@eecs.umich.edu>, '
1249              '"Someone Test #D" <someone@eecs.umich.edu>')
1250        msg = Message()
1251        msg['To'] = to
1252        eq(msg.as_string(maxheaderlen=78), '''\
1253To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
1254 "Someone Test #B" <someone@umich.edu>,
1255 "Someone Test #C" <someone@eecs.umich.edu>,
1256 "Someone Test #D" <someone@eecs.umich.edu>
1257
1258''')
1259
1260    def test_long_line_after_append(self):
1261        eq = self.ndiffAssertEqual
1262        s = 'This is an example of string which has almost the limit of header length.'
1263        h = Header(s)
1264        h.append('Add another line.')
1265        eq(h.encode(maxlinelen=76), """\
1266This is an example of string which has almost the limit of header length.
1267 Add another line.""")
1268
1269    def test_shorter_line_with_append(self):
1270        eq = self.ndiffAssertEqual
1271        s = 'This is a shorter line.'
1272        h = Header(s)
1273        h.append('Add another sentence. (Surprise?)')
1274        eq(h.encode(),
1275           'This is a shorter line. Add another sentence. (Surprise?)')
1276
1277    def test_long_field_name(self):
1278        eq = self.ndiffAssertEqual
1279        fn = 'X-Very-Very-Very-Long-Header-Name'
1280        gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1281              'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1282              'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1283              'bef\xf6rdert. ')
1284        h = Header(gs, 'iso-8859-1', header_name=fn)
1285        # BAW: this seems broken because the first line is too long
1286        eq(h.encode(maxlinelen=76), """\
1287=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1288 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1289 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1290 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
1291
1292    def test_long_received_header(self):
1293        h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1294             'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1295             'Wed, 05 Mar 2003 18:10:18 -0700')
1296        msg = Message()
1297        msg['Received-1'] = Header(h, continuation_ws='\t')
1298        msg['Received-2'] = h
1299        # This should be splitting on spaces not semicolons.
1300        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1301Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1302 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1303 Wed, 05 Mar 2003 18:10:18 -0700
1304Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1305 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1306 Wed, 05 Mar 2003 18:10:18 -0700
1307
1308""")
1309
1310    def test_string_headerinst_eq(self):
1311        h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1312             'tu-muenchen.de> (David Bremner\'s message of '
1313             '"Thu, 6 Mar 2003 13:58:21 +0100")')
1314        msg = Message()
1315        msg['Received-1'] = Header(h, header_name='Received-1',
1316                                   continuation_ws='\t')
1317        msg['Received-2'] = h
1318        # XXX The space after the ':' should not be there.
1319        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1320Received-1:\x20
1321 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1322 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1323Received-2:\x20
1324 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1325 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1326
1327""")
1328
1329    def test_long_unbreakable_lines_with_continuation(self):
1330        eq = self.ndiffAssertEqual
1331        msg = Message()
1332        t = """\
1333iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1334 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1335        msg['Face-1'] = t
1336        msg['Face-2'] = Header(t, header_name='Face-2')
1337        msg['Face-3'] = ' ' + t
1338        # XXX This splitting is all wrong.  It the first value line should be
1339        # snug against the field name or the space after the header not there.
1340        eq(msg.as_string(maxheaderlen=78), """\
1341Face-1:\x20
1342 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1343 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1344Face-2:\x20
1345 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1346 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1347Face-3:\x20
1348 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1349 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1350
1351""")
1352
1353    def test_another_long_multiline_header(self):
1354        eq = self.ndiffAssertEqual
1355        m = ('Received: from siimage.com '
1356             '([172.25.1.3]) by zima.siliconimage.com with '
1357             'Microsoft SMTPSVC(5.0.2195.4905); '
1358             'Wed, 16 Oct 2002 07:41:11 -0700')
1359        msg = email.message_from_string(m)
1360        eq(msg.as_string(maxheaderlen=78), '''\
1361Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1362 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
1363
1364''')
1365
1366    def test_long_lines_with_different_header(self):
1367        eq = self.ndiffAssertEqual
1368        h = ('List-Unsubscribe: '
1369             '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1370             '        <mailto:spamassassin-talk-request@lists.sourceforge.net'
1371             '?subject=unsubscribe>')
1372        msg = Message()
1373        msg['List'] = h
1374        msg['List'] = Header(h, header_name='List')
1375        eq(msg.as_string(maxheaderlen=78), """\
1376List: List-Unsubscribe:
1377 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1378        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1379List: List-Unsubscribe:
1380 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1381        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1382
1383""")
1384
1385    def test_long_rfc2047_header_with_embedded_fws(self):
1386        h = Header(textwrap.dedent("""\
1387            We're going to pretend this header is in a non-ascii character set
1388            \tto see if line wrapping with encoded words and embedded
1389               folding white space works"""),
1390                   charset='utf-8',
1391                   header_name='Test')
1392        self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1393            =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1394             =?utf-8?q?cter_set?=
1395             =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1396             =?utf-8?q?_folding_white_space_works?=""")+'\n')
1397
1398
1399
1400# Test mangling of "From " lines in the body of a message
1401class TestFromMangling(unittest.TestCase):
1402    def setUp(self):
1403        self.msg = Message()
1404        self.msg['From'] = 'aaa@bbb.org'
1405        self.msg.set_payload("""\
1406From the desk of A.A.A.:
1407Blah blah blah
1408""")
1409
1410    def test_mangled_from(self):
1411        s = StringIO()
1412        g = Generator(s, mangle_from_=True)
1413        g.flatten(self.msg)
1414        self.assertEqual(s.getvalue(), """\
1415From: aaa@bbb.org
1416
1417>From the desk of A.A.A.:
1418Blah blah blah
1419""")
1420
1421    def test_dont_mangle_from(self):
1422        s = StringIO()
1423        g = Generator(s, mangle_from_=False)
1424        g.flatten(self.msg)
1425        self.assertEqual(s.getvalue(), """\
1426From: aaa@bbb.org
1427
1428From the desk of A.A.A.:
1429Blah blah blah
1430""")
1431
1432    def test_mangle_from_in_preamble_and_epilog(self):
1433        s = StringIO()
1434        g = Generator(s, mangle_from_=True)
1435        msg = email.message_from_string(textwrap.dedent("""\
1436            From: foo@bar.com
1437            Mime-Version: 1.0
1438            Content-Type: multipart/mixed; boundary=XXX
1439
1440            From somewhere unknown
1441
1442            --XXX
1443            Content-Type: text/plain
1444
1445            foo
1446
1447            --XXX--
1448
1449            From somewhere unknowable
1450            """))
1451        g.flatten(msg)
1452        self.assertEqual(len([1 for x in s.getvalue().split('\n')
1453                                  if x.startswith('>From ')]), 2)
1454
1455    def test_mangled_from_with_bad_bytes(self):
1456        source = textwrap.dedent("""\
1457            Content-Type: text/plain; charset="utf-8"
1458            MIME-Version: 1.0
1459            Content-Transfer-Encoding: 8bit
1460            From: aaa@bbb.org
1461
1462        """).encode('utf-8')
1463        msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1464        b = BytesIO()
1465        g = BytesGenerator(b, mangle_from_=True)
1466        g.flatten(msg)
1467        self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1468
1469    def test_mutltipart_with_bad_bytes_in_cte(self):
1470        # bpo30835
1471        source = textwrap.dedent("""\
1472            From: aperson@example.com
1473            Content-Type: multipart/mixed; boundary="1"
1474            Content-Transfer-Encoding: \xc8
1475        """).encode('utf-8')
1476        msg = email.message_from_bytes(source)
1477
1478
1479# Test the basic MIMEAudio class
1480class TestMIMEAudio(unittest.TestCase):
1481    def setUp(self):
1482        with openfile('audiotest.au', 'rb') as fp:
1483            self._audiodata = fp.read()
1484        self._au = MIMEAudio(self._audiodata)
1485
1486    def test_guess_minor_type(self):
1487        self.assertEqual(self._au.get_content_type(), 'audio/basic')
1488
1489    def test_encoding(self):
1490        payload = self._au.get_payload()
1491        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1492                self._audiodata)
1493
1494    def test_checkSetMinor(self):
1495        au = MIMEAudio(self._audiodata, 'fish')
1496        self.assertEqual(au.get_content_type(), 'audio/fish')
1497
1498    def test_add_header(self):
1499        eq = self.assertEqual
1500        self._au.add_header('Content-Disposition', 'attachment',
1501                            filename='audiotest.au')
1502        eq(self._au['content-disposition'],
1503           'attachment; filename="audiotest.au"')
1504        eq(self._au.get_params(header='content-disposition'),
1505           [('attachment', ''), ('filename', 'audiotest.au')])
1506        eq(self._au.get_param('filename', header='content-disposition'),
1507           'audiotest.au')
1508        missing = []
1509        eq(self._au.get_param('attachment', header='content-disposition'), '')
1510        self.assertIs(self._au.get_param('foo', failobj=missing,
1511                                         header='content-disposition'), missing)
1512        # Try some missing stuff
1513        self.assertIs(self._au.get_param('foobar', missing), missing)
1514        self.assertIs(self._au.get_param('attachment', missing,
1515                                         header='foobar'), missing)
1516
1517
1518
1519# Test the basic MIMEImage class
1520class TestMIMEImage(unittest.TestCase):
1521    def setUp(self):
1522        with openfile('PyBanner048.gif', 'rb') as fp:
1523            self._imgdata = fp.read()
1524        self._im = MIMEImage(self._imgdata)
1525
1526    def test_guess_minor_type(self):
1527        self.assertEqual(self._im.get_content_type(), 'image/gif')
1528
1529    def test_encoding(self):
1530        payload = self._im.get_payload()
1531        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1532                self._imgdata)
1533
1534    def test_checkSetMinor(self):
1535        im = MIMEImage(self._imgdata, 'fish')
1536        self.assertEqual(im.get_content_type(), 'image/fish')
1537
1538    def test_add_header(self):
1539        eq = self.assertEqual
1540        self._im.add_header('Content-Disposition', 'attachment',
1541                            filename='dingusfish.gif')
1542        eq(self._im['content-disposition'],
1543           'attachment; filename="dingusfish.gif"')
1544        eq(self._im.get_params(header='content-disposition'),
1545           [('attachment', ''), ('filename', 'dingusfish.gif')])
1546        eq(self._im.get_param('filename', header='content-disposition'),
1547           'dingusfish.gif')
1548        missing = []
1549        eq(self._im.get_param('attachment', header='content-disposition'), '')
1550        self.assertIs(self._im.get_param('foo', failobj=missing,
1551                                         header='content-disposition'), missing)
1552        # Try some missing stuff
1553        self.assertIs(self._im.get_param('foobar', missing), missing)
1554        self.assertIs(self._im.get_param('attachment', missing,
1555                                         header='foobar'), missing)
1556
1557
1558
1559# Test the basic MIMEApplication class
1560class TestMIMEApplication(unittest.TestCase):
1561    def test_headers(self):
1562        eq = self.assertEqual
1563        msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
1564        eq(msg.get_content_type(), 'application/octet-stream')
1565        eq(msg['content-transfer-encoding'], 'base64')
1566
1567    def test_body(self):
1568        eq = self.assertEqual
1569        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1570        msg = MIMEApplication(bytesdata)
1571        # whitespace in the cte encoded block is RFC-irrelevant.
1572        eq(msg.get_payload().strip(), '+vv8/f7/')
1573        eq(msg.get_payload(decode=True), bytesdata)
1574
1575    def test_binary_body_with_encode_7or8bit(self):
1576        # Issue 17171.
1577        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1578        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1579        # Treated as a string, this will be invalid code points.
1580        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1581        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1582        self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1583        s = BytesIO()
1584        g = BytesGenerator(s)
1585        g.flatten(msg)
1586        wireform = s.getvalue()
1587        msg2 = email.message_from_bytes(wireform)
1588        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1589        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1590        self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1591
1592    def test_binary_body_with_encode_noop(self):
1593        # Issue 16564: This does not produce an RFC valid message, since to be
1594        # valid it should have a CTE of binary.  But the below works in
1595        # Python2, and is documented as working this way.
1596        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1597        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1598        # Treated as a string, this will be invalid code points.
1599        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1600        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1601        s = BytesIO()
1602        g = BytesGenerator(s)
1603        g.flatten(msg)
1604        wireform = s.getvalue()
1605        msg2 = email.message_from_bytes(wireform)
1606        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1607        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1608
1609    def test_binary_body_with_unicode_linend_encode_noop(self):
1610        # Issue 19003: This is a variation on #16564.
1611        bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff'
1612        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1613        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1614        s = BytesIO()
1615        g = BytesGenerator(s)
1616        g.flatten(msg)
1617        wireform = s.getvalue()
1618        msg2 = email.message_from_bytes(wireform)
1619        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1620
1621    def test_binary_body_with_encode_quopri(self):
1622        # Issue 14360.
1623        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1624        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1625        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1626        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1627        self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1628        s = BytesIO()
1629        g = BytesGenerator(s)
1630        g.flatten(msg)
1631        wireform = s.getvalue()
1632        msg2 = email.message_from_bytes(wireform)
1633        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1634        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1635        self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1636
1637    def test_binary_body_with_encode_base64(self):
1638        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1639        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1640        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1641        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1642        s = BytesIO()
1643        g = BytesGenerator(s)
1644        g.flatten(msg)
1645        wireform = s.getvalue()
1646        msg2 = email.message_from_bytes(wireform)
1647        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1648        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1649
1650
1651# Test the basic MIMEText class
1652class TestMIMEText(unittest.TestCase):
1653    def setUp(self):
1654        self._msg = MIMEText('hello there')
1655
1656    def test_types(self):
1657        eq = self.assertEqual
1658        eq(self._msg.get_content_type(), 'text/plain')
1659        eq(self._msg.get_param('charset'), 'us-ascii')
1660        missing = []
1661        self.assertIs(self._msg.get_param('foobar', missing), missing)
1662        self.assertIs(self._msg.get_param('charset', missing, header='foobar'),
1663                      missing)
1664
1665    def test_payload(self):
1666        self.assertEqual(self._msg.get_payload(), 'hello there')
1667        self.assertFalse(self._msg.is_multipart())
1668
1669    def test_charset(self):
1670        eq = self.assertEqual
1671        msg = MIMEText('hello there', _charset='us-ascii')
1672        eq(msg.get_charset().input_charset, 'us-ascii')
1673        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1674        # Also accept a Charset instance
1675        charset = Charset('utf-8')
1676        charset.body_encoding = None
1677        msg = MIMEText('hello there', _charset=charset)
1678        eq(msg.get_charset().input_charset, 'utf-8')
1679        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1680        eq(msg.get_payload(), 'hello there')
1681
1682    def test_7bit_input(self):
1683        eq = self.assertEqual
1684        msg = MIMEText('hello there', _charset='us-ascii')
1685        eq(msg.get_charset().input_charset, 'us-ascii')
1686        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1687
1688    def test_7bit_input_no_charset(self):
1689        eq = self.assertEqual
1690        msg = MIMEText('hello there')
1691        eq(msg.get_charset(), 'us-ascii')
1692        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1693        self.assertIn('hello there', msg.as_string())
1694
1695    def test_utf8_input(self):
1696        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1697        eq = self.assertEqual
1698        msg = MIMEText(teststr, _charset='utf-8')
1699        eq(msg.get_charset().output_charset, 'utf-8')
1700        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1701        eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1702
1703    @unittest.skip("can't fix because of backward compat in email5, "
1704        "will fix in email6")
1705    def test_utf8_input_no_charset(self):
1706        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1707        self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1708
1709
1710
1711# Test complicated multipart/* messages
1712class TestMultipart(TestEmailBase):
1713    def setUp(self):
1714        with openfile('PyBanner048.gif', 'rb') as fp:
1715            data = fp.read()
1716        container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1717        image = MIMEImage(data, name='dingusfish.gif')
1718        image.add_header('content-disposition', 'attachment',
1719                         filename='dingusfish.gif')
1720        intro = MIMEText('''\
1721Hi there,
1722
1723This is the dingus fish.
1724''')
1725        container.attach(intro)
1726        container.attach(image)
1727        container['From'] = 'Barry <barry@digicool.com>'
1728        container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1729        container['Subject'] = 'Here is your dingus fish'
1730
1731        now = 987809702.54848599
1732        timetuple = time.localtime(now)
1733        if timetuple[-1] == 0:
1734            tzsecs = time.timezone
1735        else:
1736            tzsecs = time.altzone
1737        if tzsecs > 0:
1738            sign = '-'
1739        else:
1740            sign = '+'
1741        tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1742        container['Date'] = time.strftime(
1743            '%a, %d %b %Y %H:%M:%S',
1744            time.localtime(now)) + tzoffset
1745        self._msg = container
1746        self._im = image
1747        self._txt = intro
1748
1749    def test_hierarchy(self):
1750        # convenience
1751        eq = self.assertEqual
1752        raises = self.assertRaises
1753        # tests
1754        m = self._msg
1755        self.assertTrue(m.is_multipart())
1756        eq(m.get_content_type(), 'multipart/mixed')
1757        eq(len(m.get_payload()), 2)
1758        raises(IndexError, m.get_payload, 2)
1759        m0 = m.get_payload(0)
1760        m1 = m.get_payload(1)
1761        self.assertIs(m0, self._txt)
1762        self.assertIs(m1, self._im)
1763        eq(m.get_payload(), [m0, m1])
1764        self.assertFalse(m0.is_multipart())
1765        self.assertFalse(m1.is_multipart())
1766
1767    def test_empty_multipart_idempotent(self):
1768        text = """\
1769Content-Type: multipart/mixed; boundary="BOUNDARY"
1770MIME-Version: 1.0
1771Subject: A subject
1772To: aperson@dom.ain
1773From: bperson@dom.ain
1774
1775
1776--BOUNDARY
1777
1778
1779--BOUNDARY--
1780"""
1781        msg = Parser().parsestr(text)
1782        self.ndiffAssertEqual(text, msg.as_string())
1783
1784    def test_no_parts_in_a_multipart_with_none_epilogue(self):
1785        outer = MIMEBase('multipart', 'mixed')
1786        outer['Subject'] = 'A subject'
1787        outer['To'] = 'aperson@dom.ain'
1788        outer['From'] = 'bperson@dom.ain'
1789        outer.set_boundary('BOUNDARY')
1790        self.ndiffAssertEqual(outer.as_string(), '''\
1791Content-Type: multipart/mixed; boundary="BOUNDARY"
1792MIME-Version: 1.0
1793Subject: A subject
1794To: aperson@dom.ain
1795From: bperson@dom.ain
1796
1797--BOUNDARY
1798
1799--BOUNDARY--
1800''')
1801
1802    def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1803        outer = MIMEBase('multipart', 'mixed')
1804        outer['Subject'] = 'A subject'
1805        outer['To'] = 'aperson@dom.ain'
1806        outer['From'] = 'bperson@dom.ain'
1807        outer.preamble = ''
1808        outer.epilogue = ''
1809        outer.set_boundary('BOUNDARY')
1810        self.ndiffAssertEqual(outer.as_string(), '''\
1811Content-Type: multipart/mixed; boundary="BOUNDARY"
1812MIME-Version: 1.0
1813Subject: A subject
1814To: aperson@dom.ain
1815From: bperson@dom.ain
1816
1817
1818--BOUNDARY
1819
1820--BOUNDARY--
1821''')
1822
1823    def test_one_part_in_a_multipart(self):
1824        eq = self.ndiffAssertEqual
1825        outer = MIMEBase('multipart', 'mixed')
1826        outer['Subject'] = 'A subject'
1827        outer['To'] = 'aperson@dom.ain'
1828        outer['From'] = 'bperson@dom.ain'
1829        outer.set_boundary('BOUNDARY')
1830        msg = MIMEText('hello world')
1831        outer.attach(msg)
1832        eq(outer.as_string(), '''\
1833Content-Type: multipart/mixed; boundary="BOUNDARY"
1834MIME-Version: 1.0
1835Subject: A subject
1836To: aperson@dom.ain
1837From: bperson@dom.ain
1838
1839--BOUNDARY
1840Content-Type: text/plain; charset="us-ascii"
1841MIME-Version: 1.0
1842Content-Transfer-Encoding: 7bit
1843
1844hello world
1845--BOUNDARY--
1846''')
1847
1848    def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1849        eq = self.ndiffAssertEqual
1850        outer = MIMEBase('multipart', 'mixed')
1851        outer['Subject'] = 'A subject'
1852        outer['To'] = 'aperson@dom.ain'
1853        outer['From'] = 'bperson@dom.ain'
1854        outer.preamble = ''
1855        msg = MIMEText('hello world')
1856        outer.attach(msg)
1857        outer.set_boundary('BOUNDARY')
1858        eq(outer.as_string(), '''\
1859Content-Type: multipart/mixed; boundary="BOUNDARY"
1860MIME-Version: 1.0
1861Subject: A subject
1862To: aperson@dom.ain
1863From: bperson@dom.ain
1864
1865
1866--BOUNDARY
1867Content-Type: text/plain; charset="us-ascii"
1868MIME-Version: 1.0
1869Content-Transfer-Encoding: 7bit
1870
1871hello world
1872--BOUNDARY--
1873''')
1874
1875
1876    def test_seq_parts_in_a_multipart_with_none_preamble(self):
1877        eq = self.ndiffAssertEqual
1878        outer = MIMEBase('multipart', 'mixed')
1879        outer['Subject'] = 'A subject'
1880        outer['To'] = 'aperson@dom.ain'
1881        outer['From'] = 'bperson@dom.ain'
1882        outer.preamble = None
1883        msg = MIMEText('hello world')
1884        outer.attach(msg)
1885        outer.set_boundary('BOUNDARY')
1886        eq(outer.as_string(), '''\
1887Content-Type: multipart/mixed; boundary="BOUNDARY"
1888MIME-Version: 1.0
1889Subject: A subject
1890To: aperson@dom.ain
1891From: bperson@dom.ain
1892
1893--BOUNDARY
1894Content-Type: text/plain; charset="us-ascii"
1895MIME-Version: 1.0
1896Content-Transfer-Encoding: 7bit
1897
1898hello world
1899--BOUNDARY--
1900''')
1901
1902
1903    def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1904        eq = self.ndiffAssertEqual
1905        outer = MIMEBase('multipart', 'mixed')
1906        outer['Subject'] = 'A subject'
1907        outer['To'] = 'aperson@dom.ain'
1908        outer['From'] = 'bperson@dom.ain'
1909        outer.epilogue = None
1910        msg = MIMEText('hello world')
1911        outer.attach(msg)
1912        outer.set_boundary('BOUNDARY')
1913        eq(outer.as_string(), '''\
1914Content-Type: multipart/mixed; boundary="BOUNDARY"
1915MIME-Version: 1.0
1916Subject: A subject
1917To: aperson@dom.ain
1918From: bperson@dom.ain
1919
1920--BOUNDARY
1921Content-Type: text/plain; charset="us-ascii"
1922MIME-Version: 1.0
1923Content-Transfer-Encoding: 7bit
1924
1925hello world
1926--BOUNDARY--
1927''')
1928
1929
1930    def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1931        eq = self.ndiffAssertEqual
1932        outer = MIMEBase('multipart', 'mixed')
1933        outer['Subject'] = 'A subject'
1934        outer['To'] = 'aperson@dom.ain'
1935        outer['From'] = 'bperson@dom.ain'
1936        outer.epilogue = ''
1937        msg = MIMEText('hello world')
1938        outer.attach(msg)
1939        outer.set_boundary('BOUNDARY')
1940        eq(outer.as_string(), '''\
1941Content-Type: multipart/mixed; boundary="BOUNDARY"
1942MIME-Version: 1.0
1943Subject: A subject
1944To: aperson@dom.ain
1945From: bperson@dom.ain
1946
1947--BOUNDARY
1948Content-Type: text/plain; charset="us-ascii"
1949MIME-Version: 1.0
1950Content-Transfer-Encoding: 7bit
1951
1952hello world
1953--BOUNDARY--
1954''')
1955
1956
1957    def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1958        eq = self.ndiffAssertEqual
1959        outer = MIMEBase('multipart', 'mixed')
1960        outer['Subject'] = 'A subject'
1961        outer['To'] = 'aperson@dom.ain'
1962        outer['From'] = 'bperson@dom.ain'
1963        outer.epilogue = '\n'
1964        msg = MIMEText('hello world')
1965        outer.attach(msg)
1966        outer.set_boundary('BOUNDARY')
1967        eq(outer.as_string(), '''\
1968Content-Type: multipart/mixed; boundary="BOUNDARY"
1969MIME-Version: 1.0
1970Subject: A subject
1971To: aperson@dom.ain
1972From: bperson@dom.ain
1973
1974--BOUNDARY
1975Content-Type: text/plain; charset="us-ascii"
1976MIME-Version: 1.0
1977Content-Transfer-Encoding: 7bit
1978
1979hello world
1980--BOUNDARY--
1981
1982''')
1983
1984    def test_message_external_body(self):
1985        eq = self.assertEqual
1986        msg = self._msgobj('msg_36.txt')
1987        eq(len(msg.get_payload()), 2)
1988        msg1 = msg.get_payload(1)
1989        eq(msg1.get_content_type(), 'multipart/alternative')
1990        eq(len(msg1.get_payload()), 2)
1991        for subpart in msg1.get_payload():
1992            eq(subpart.get_content_type(), 'message/external-body')
1993            eq(len(subpart.get_payload()), 1)
1994            subsubpart = subpart.get_payload(0)
1995            eq(subsubpart.get_content_type(), 'text/plain')
1996
1997    def test_double_boundary(self):
1998        # msg_37.txt is a multipart that contains two dash-boundary's in a
1999        # row.  Our interpretation of RFC 2046 calls for ignoring the second
2000        # and subsequent boundaries.
2001        msg = self._msgobj('msg_37.txt')
2002        self.assertEqual(len(msg.get_payload()), 3)
2003
2004    def test_nested_inner_contains_outer_boundary(self):
2005        eq = self.ndiffAssertEqual
2006        # msg_38.txt has an inner part that contains outer boundaries.  My
2007        # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
2008        # these are illegal and should be interpreted as unterminated inner
2009        # parts.
2010        msg = self._msgobj('msg_38.txt')
2011        sfp = StringIO()
2012        iterators._structure(msg, sfp)
2013        eq(sfp.getvalue(), """\
2014multipart/mixed
2015    multipart/mixed
2016        multipart/alternative
2017            text/plain
2018        text/plain
2019    text/plain
2020    text/plain
2021""")
2022
2023    def test_nested_with_same_boundary(self):
2024        eq = self.ndiffAssertEqual
2025        # msg 39.txt is similarly evil in that it's got inner parts that use
2026        # the same boundary as outer parts.  Again, I believe the way this is
2027        # parsed is closest to the spirit of RFC 2046
2028        msg = self._msgobj('msg_39.txt')
2029        sfp = StringIO()
2030        iterators._structure(msg, sfp)
2031        eq(sfp.getvalue(), """\
2032multipart/mixed
2033    multipart/mixed
2034        multipart/alternative
2035        application/octet-stream
2036        application/octet-stream
2037    text/plain
2038""")
2039
2040    def test_boundary_in_non_multipart(self):
2041        msg = self._msgobj('msg_40.txt')
2042        self.assertEqual(msg.as_string(), '''\
2043MIME-Version: 1.0
2044Content-Type: text/html; boundary="--961284236552522269"
2045
2046----961284236552522269
2047Content-Type: text/html;
2048Content-Transfer-Encoding: 7Bit
2049
2050<html></html>
2051
2052----961284236552522269--
2053''')
2054
2055    def test_boundary_with_leading_space(self):
2056        eq = self.assertEqual
2057        msg = email.message_from_string('''\
2058MIME-Version: 1.0
2059Content-Type: multipart/mixed; boundary="    XXXX"
2060
2061--    XXXX
2062Content-Type: text/plain
2063
2064
2065--    XXXX
2066Content-Type: text/plain
2067
2068--    XXXX--
2069''')
2070        self.assertTrue(msg.is_multipart())
2071        eq(msg.get_boundary(), '    XXXX')
2072        eq(len(msg.get_payload()), 2)
2073
2074    def test_boundary_without_trailing_newline(self):
2075        m = Parser().parsestr("""\
2076Content-Type: multipart/mixed; boundary="===============0012394164=="
2077MIME-Version: 1.0
2078
2079--===============0012394164==
2080Content-Type: image/file1.jpg
2081MIME-Version: 1.0
2082Content-Transfer-Encoding: base64
2083
2084YXNkZg==
2085--===============0012394164==--""")
2086        self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
2087
2088    def test_mimebase_default_policy(self):
2089        m = MIMEBase('multipart', 'mixed')
2090        self.assertIs(m.policy, email.policy.compat32)
2091
2092    def test_mimebase_custom_policy(self):
2093        m = MIMEBase('multipart', 'mixed', policy=email.policy.default)
2094        self.assertIs(m.policy, email.policy.default)
2095
2096# Test some badly formatted messages
2097class TestNonConformant(TestEmailBase):
2098
2099    def test_parse_missing_minor_type(self):
2100        eq = self.assertEqual
2101        msg = self._msgobj('msg_14.txt')
2102        eq(msg.get_content_type(), 'text/plain')
2103        eq(msg.get_content_maintype(), 'text')
2104        eq(msg.get_content_subtype(), 'plain')
2105
2106    # test_defect_handling
2107    def test_same_boundary_inner_outer(self):
2108        msg = self._msgobj('msg_15.txt')
2109        # XXX We can probably eventually do better
2110        inner = msg.get_payload(0)
2111        self.assertTrue(hasattr(inner, 'defects'))
2112        self.assertEqual(len(inner.defects), 1)
2113        self.assertIsInstance(inner.defects[0],
2114                              errors.StartBoundaryNotFoundDefect)
2115
2116    # test_defect_handling
2117    def test_multipart_no_boundary(self):
2118        msg = self._msgobj('msg_25.txt')
2119        self.assertIsInstance(msg.get_payload(), str)
2120        self.assertEqual(len(msg.defects), 2)
2121        self.assertIsInstance(msg.defects[0],
2122                              errors.NoBoundaryInMultipartDefect)
2123        self.assertIsInstance(msg.defects[1],
2124                              errors.MultipartInvariantViolationDefect)
2125
2126    multipart_msg = textwrap.dedent("""\
2127        Date: Wed, 14 Nov 2007 12:56:23 GMT
2128        From: foo@bar.invalid
2129        To: foo@bar.invalid
2130        Subject: Content-Transfer-Encoding: base64 and multipart
2131        MIME-Version: 1.0
2132        Content-Type: multipart/mixed;
2133            boundary="===============3344438784458119861=="{}
2134
2135        --===============3344438784458119861==
2136        Content-Type: text/plain
2137
2138        Test message
2139
2140        --===============3344438784458119861==
2141        Content-Type: application/octet-stream
2142        Content-Transfer-Encoding: base64
2143
2144        YWJj
2145
2146        --===============3344438784458119861==--
2147        """)
2148
2149    # test_defect_handling
2150    def test_multipart_invalid_cte(self):
2151        msg = self._str_msg(
2152            self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2153        self.assertEqual(len(msg.defects), 1)
2154        self.assertIsInstance(msg.defects[0],
2155            errors.InvalidMultipartContentTransferEncodingDefect)
2156
2157    # test_defect_handling
2158    def test_multipart_no_cte_no_defect(self):
2159        msg = self._str_msg(self.multipart_msg.format(''))
2160        self.assertEqual(len(msg.defects), 0)
2161
2162    # test_defect_handling
2163    def test_multipart_valid_cte_no_defect(self):
2164        for cte in ('7bit', '8bit', 'BINary'):
2165            msg = self._str_msg(
2166                self.multipart_msg.format(
2167                    "\nContent-Transfer-Encoding: {}".format(cte)))
2168            self.assertEqual(len(msg.defects), 0)
2169
2170    # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
2171    def test_invalid_content_type(self):
2172        eq = self.assertEqual
2173        neq = self.ndiffAssertEqual
2174        msg = Message()
2175        # RFC 2045, $5.2 says invalid yields text/plain
2176        msg['Content-Type'] = 'text'
2177        eq(msg.get_content_maintype(), 'text')
2178        eq(msg.get_content_subtype(), 'plain')
2179        eq(msg.get_content_type(), 'text/plain')
2180        # Clear the old value and try something /really/ invalid
2181        del msg['content-type']
2182        msg['Content-Type'] = 'foo'
2183        eq(msg.get_content_maintype(), 'text')
2184        eq(msg.get_content_subtype(), 'plain')
2185        eq(msg.get_content_type(), 'text/plain')
2186        # Still, make sure that the message is idempotently generated
2187        s = StringIO()
2188        g = Generator(s)
2189        g.flatten(msg)
2190        neq(s.getvalue(), 'Content-Type: foo\n\n')
2191
2192    def test_no_start_boundary(self):
2193        eq = self.ndiffAssertEqual
2194        msg = self._msgobj('msg_31.txt')
2195        eq(msg.get_payload(), """\
2196--BOUNDARY
2197Content-Type: text/plain
2198
2199message 1
2200
2201--BOUNDARY
2202Content-Type: text/plain
2203
2204message 2
2205
2206--BOUNDARY--
2207""")
2208
2209    def test_no_separating_blank_line(self):
2210        eq = self.ndiffAssertEqual
2211        msg = self._msgobj('msg_35.txt')
2212        eq(msg.as_string(), """\
2213From: aperson@dom.ain
2214To: bperson@dom.ain
2215Subject: here's something interesting
2216
2217counter to RFC 2822, there's no separating newline here
2218""")
2219
2220    # test_defect_handling
2221    def test_lying_multipart(self):
2222        msg = self._msgobj('msg_41.txt')
2223        self.assertTrue(hasattr(msg, 'defects'))
2224        self.assertEqual(len(msg.defects), 2)
2225        self.assertIsInstance(msg.defects[0],
2226                              errors.NoBoundaryInMultipartDefect)
2227        self.assertIsInstance(msg.defects[1],
2228                              errors.MultipartInvariantViolationDefect)
2229
2230    # test_defect_handling
2231    def test_missing_start_boundary(self):
2232        outer = self._msgobj('msg_42.txt')
2233        # The message structure is:
2234        #
2235        # multipart/mixed
2236        #    text/plain
2237        #    message/rfc822
2238        #        multipart/mixed [*]
2239        #
2240        # [*] This message is missing its start boundary
2241        bad = outer.get_payload(1).get_payload(0)
2242        self.assertEqual(len(bad.defects), 1)
2243        self.assertIsInstance(bad.defects[0],
2244                              errors.StartBoundaryNotFoundDefect)
2245
2246    # test_defect_handling
2247    def test_first_line_is_continuation_header(self):
2248        eq = self.assertEqual
2249        m = ' Line 1\nSubject: test\n\nbody'
2250        msg = email.message_from_string(m)
2251        eq(msg.keys(), ['Subject'])
2252        eq(msg.get_payload(), 'body')
2253        eq(len(msg.defects), 1)
2254        self.assertDefectsEqual(msg.defects,
2255                                 [errors.FirstHeaderLineIsContinuationDefect])
2256        eq(msg.defects[0].line, ' Line 1\n')
2257
2258    # test_defect_handling
2259    def test_missing_header_body_separator(self):
2260        # Our heuristic if we see a line that doesn't look like a header (no
2261        # leading whitespace but no ':') is to assume that the blank line that
2262        # separates the header from the body is missing, and to stop parsing
2263        # headers and start parsing the body.
2264        msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2265        self.assertEqual(msg.keys(), ['Subject'])
2266        self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2267        self.assertDefectsEqual(msg.defects,
2268                                [errors.MissingHeaderBodySeparatorDefect])
2269
2270
2271# Test RFC 2047 header encoding and decoding
2272class TestRFC2047(TestEmailBase):
2273    def test_rfc2047_multiline(self):
2274        eq = self.assertEqual
2275        s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2276 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2277        dh = decode_header(s)
2278        eq(dh, [
2279            (b'Re: ', None),
2280            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2281            (b' baz foo bar ', None),
2282            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2283        header = make_header(dh)
2284        eq(str(header),
2285           'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
2286        self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
2287Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2288 =?mac-iceland?q?=9Arg=8Cs?=""")
2289
2290    def test_whitespace_keeper_unicode(self):
2291        eq = self.assertEqual
2292        s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2293        dh = decode_header(s)
2294        eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2295                (b' Pirard <pirard@dom.ain>', None)])
2296        header = str(make_header(dh))
2297        eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2298
2299    def test_whitespace_keeper_unicode_2(self):
2300        eq = self.assertEqual
2301        s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2302        dh = decode_header(s)
2303        eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2304                (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
2305        hu = str(make_header(dh))
2306        eq(hu, 'The quick brown fox jumped over the lazy dog')
2307
2308    def test_rfc2047_missing_whitespace(self):
2309        s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2310        dh = decode_header(s)
2311        self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2312                              (b'rg', None), (b'\xe5', 'iso-8859-1'),
2313                              (b'sbord', None)])
2314
2315    def test_rfc2047_with_whitespace(self):
2316        s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2317        dh = decode_header(s)
2318        self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2319                              (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2320                              (b' sbord', None)])
2321
2322    def test_rfc2047_B_bad_padding(self):
2323        s = '=?iso-8859-1?B?%s?='
2324        data = [                                # only test complete bytes
2325            ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2326            ('dmk=', b'vi'), ('dmk', b'vi')
2327          ]
2328        for q, a in data:
2329            dh = decode_header(s % q)
2330            self.assertEqual(dh, [(a, 'iso-8859-1')])
2331
2332    def test_rfc2047_Q_invalid_digits(self):
2333        # issue 10004.
2334        s = '=?iso-8859-1?Q?andr=e9=zz?='
2335        self.assertEqual(decode_header(s),
2336                        [(b'andr\xe9=zz', 'iso-8859-1')])
2337
2338    def test_rfc2047_rfc2047_1(self):
2339        # 1st testcase at end of rfc2047
2340        s = '(=?ISO-8859-1?Q?a?=)'
2341        self.assertEqual(decode_header(s),
2342            [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2343
2344    def test_rfc2047_rfc2047_2(self):
2345        # 2nd testcase at end of rfc2047
2346        s = '(=?ISO-8859-1?Q?a?= b)'
2347        self.assertEqual(decode_header(s),
2348            [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2349
2350    def test_rfc2047_rfc2047_3(self):
2351        # 3rd testcase at end of rfc2047
2352        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2353        self.assertEqual(decode_header(s),
2354            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2355
2356    def test_rfc2047_rfc2047_4(self):
2357        # 4th testcase at end of rfc2047
2358        s = '(=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=)'
2359        self.assertEqual(decode_header(s),
2360            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2361
2362    def test_rfc2047_rfc2047_5a(self):
2363        # 5th testcase at end of rfc2047 newline is \r\n
2364        s = '(=?ISO-8859-1?Q?a?=\r\n    =?ISO-8859-1?Q?b?=)'
2365        self.assertEqual(decode_header(s),
2366            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2367
2368    def test_rfc2047_rfc2047_5b(self):
2369        # 5th testcase at end of rfc2047 newline is \n
2370        s = '(=?ISO-8859-1?Q?a?=\n    =?ISO-8859-1?Q?b?=)'
2371        self.assertEqual(decode_header(s),
2372            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2373
2374    def test_rfc2047_rfc2047_6(self):
2375        # 6th testcase at end of rfc2047
2376        s = '(=?ISO-8859-1?Q?a_b?=)'
2377        self.assertEqual(decode_header(s),
2378            [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2379
2380    def test_rfc2047_rfc2047_7(self):
2381        # 7th testcase at end of rfc2047
2382        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2383        self.assertEqual(decode_header(s),
2384            [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2385             (b')', None)])
2386        self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2387        self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2388
2389    def test_multiline_header(self):
2390        s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2391        self.assertEqual(decode_header(s),
2392            [(b'"M\xfcller T"', 'windows-1252'),
2393             (b'<T.Mueller@xxx.com>', None)])
2394        self.assertEqual(make_header(decode_header(s)).encode(),
2395                         ''.join(s.splitlines()))
2396        self.assertEqual(str(make_header(decode_header(s))),
2397                         '"Müller T" <T.Mueller@xxx.com>')
2398
2399
2400# Test the MIMEMessage class
2401class TestMIMEMessage(TestEmailBase):
2402    def setUp(self):
2403        with openfile('msg_11.txt') as fp:
2404            self._text = fp.read()
2405
2406    def test_type_error(self):
2407        self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2408
2409    def test_valid_argument(self):
2410        eq = self.assertEqual
2411        subject = 'A sub-message'
2412        m = Message()
2413        m['Subject'] = subject
2414        r = MIMEMessage(m)
2415        eq(r.get_content_type(), 'message/rfc822')
2416        payload = r.get_payload()
2417        self.assertIsInstance(payload, list)
2418        eq(len(payload), 1)
2419        subpart = payload[0]
2420        self.assertIs(subpart, m)
2421        eq(subpart['subject'], subject)
2422
2423    def test_bad_multipart(self):
2424        msg1 = Message()
2425        msg1['Subject'] = 'subpart 1'
2426        msg2 = Message()
2427        msg2['Subject'] = 'subpart 2'
2428        r = MIMEMessage(msg1)
2429        self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2430
2431    def test_generate(self):
2432        # First craft the message to be encapsulated
2433        m = Message()
2434        m['Subject'] = 'An enclosed message'
2435        m.set_payload('Here is the body of the message.\n')
2436        r = MIMEMessage(m)
2437        r['Subject'] = 'The enclosing message'
2438        s = StringIO()
2439        g = Generator(s)
2440        g.flatten(r)
2441        self.assertEqual(s.getvalue(), """\
2442Content-Type: message/rfc822
2443MIME-Version: 1.0
2444Subject: The enclosing message
2445
2446Subject: An enclosed message
2447
2448Here is the body of the message.
2449""")
2450
2451    def test_parse_message_rfc822(self):
2452        eq = self.assertEqual
2453        msg = self._msgobj('msg_11.txt')
2454        eq(msg.get_content_type(), 'message/rfc822')
2455        payload = msg.get_payload()
2456        self.assertIsInstance(payload, list)
2457        eq(len(payload), 1)
2458        submsg = payload[0]
2459        self.assertIsInstance(submsg, Message)
2460        eq(submsg['subject'], 'An enclosed message')
2461        eq(submsg.get_payload(), 'Here is the body of the message.\n')
2462
2463    def test_dsn(self):
2464        eq = self.assertEqual
2465        # msg 16 is a Delivery Status Notification, see RFC 1894
2466        msg = self._msgobj('msg_16.txt')
2467        eq(msg.get_content_type(), 'multipart/report')
2468        self.assertTrue(msg.is_multipart())
2469        eq(len(msg.get_payload()), 3)
2470        # Subpart 1 is a text/plain, human readable section
2471        subpart = msg.get_payload(0)
2472        eq(subpart.get_content_type(), 'text/plain')
2473        eq(subpart.get_payload(), """\
2474This report relates to a message you sent with the following header fields:
2475
2476  Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2477  Date: Sun, 23 Sep 2001 20:10:55 -0700
2478  From: "Ian T. Henry" <henryi@oxy.edu>
2479  To: SoCal Raves <scr@socal-raves.org>
2480  Subject: [scr] yeah for Ians!!
2481
2482Your message cannot be delivered to the following recipients:
2483
2484  Recipient address: jangel1@cougar.noc.ucla.edu
2485  Reason: recipient reached disk quota
2486
2487""")
2488        # Subpart 2 contains the machine parsable DSN information.  It
2489        # consists of two blocks of headers, represented by two nested Message
2490        # objects.
2491        subpart = msg.get_payload(1)
2492        eq(subpart.get_content_type(), 'message/delivery-status')
2493        eq(len(subpart.get_payload()), 2)
2494        # message/delivery-status should treat each block as a bunch of
2495        # headers, i.e. a bunch of Message objects.
2496        dsn1 = subpart.get_payload(0)
2497        self.assertIsInstance(dsn1, Message)
2498        eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2499        eq(dsn1.get_param('dns', header='reporting-mta'), '')
2500        # Try a missing one <wink>
2501        eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2502        dsn2 = subpart.get_payload(1)
2503        self.assertIsInstance(dsn2, Message)
2504        eq(dsn2['action'], 'failed')
2505        eq(dsn2.get_params(header='original-recipient'),
2506           [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2507        eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2508        # Subpart 3 is the original message
2509        subpart = msg.get_payload(2)
2510        eq(subpart.get_content_type(), 'message/rfc822')
2511        payload = subpart.get_payload()
2512        self.assertIsInstance(payload, list)
2513        eq(len(payload), 1)
2514        subsubpart = payload[0]
2515        self.assertIsInstance(subsubpart, Message)
2516        eq(subsubpart.get_content_type(), 'text/plain')
2517        eq(subsubpart['message-id'],
2518           '<002001c144a6$8752e060$56104586@oxy.edu>')
2519
2520    def test_epilogue(self):
2521        eq = self.ndiffAssertEqual
2522        with openfile('msg_21.txt') as fp:
2523            text = fp.read()
2524        msg = Message()
2525        msg['From'] = 'aperson@dom.ain'
2526        msg['To'] = 'bperson@dom.ain'
2527        msg['Subject'] = 'Test'
2528        msg.preamble = 'MIME message'
2529        msg.epilogue = 'End of MIME message\n'
2530        msg1 = MIMEText('One')
2531        msg2 = MIMEText('Two')
2532        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2533        msg.attach(msg1)
2534        msg.attach(msg2)
2535        sfp = StringIO()
2536        g = Generator(sfp)
2537        g.flatten(msg)
2538        eq(sfp.getvalue(), text)
2539
2540    def test_no_nl_preamble(self):
2541        eq = self.ndiffAssertEqual
2542        msg = Message()
2543        msg['From'] = 'aperson@dom.ain'
2544        msg['To'] = 'bperson@dom.ain'
2545        msg['Subject'] = 'Test'
2546        msg.preamble = 'MIME message'
2547        msg.epilogue = ''
2548        msg1 = MIMEText('One')
2549        msg2 = MIMEText('Two')
2550        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2551        msg.attach(msg1)
2552        msg.attach(msg2)
2553        eq(msg.as_string(), """\
2554From: aperson@dom.ain
2555To: bperson@dom.ain
2556Subject: Test
2557Content-Type: multipart/mixed; boundary="BOUNDARY"
2558
2559MIME message
2560--BOUNDARY
2561Content-Type: text/plain; charset="us-ascii"
2562MIME-Version: 1.0
2563Content-Transfer-Encoding: 7bit
2564
2565One
2566--BOUNDARY
2567Content-Type: text/plain; charset="us-ascii"
2568MIME-Version: 1.0
2569Content-Transfer-Encoding: 7bit
2570
2571Two
2572--BOUNDARY--
2573""")
2574
2575    def test_default_type(self):
2576        eq = self.assertEqual
2577        with openfile('msg_30.txt') as fp:
2578            msg = email.message_from_file(fp)
2579        container1 = msg.get_payload(0)
2580        eq(container1.get_default_type(), 'message/rfc822')
2581        eq(container1.get_content_type(), 'message/rfc822')
2582        container2 = msg.get_payload(1)
2583        eq(container2.get_default_type(), 'message/rfc822')
2584        eq(container2.get_content_type(), 'message/rfc822')
2585        container1a = container1.get_payload(0)
2586        eq(container1a.get_default_type(), 'text/plain')
2587        eq(container1a.get_content_type(), 'text/plain')
2588        container2a = container2.get_payload(0)
2589        eq(container2a.get_default_type(), 'text/plain')
2590        eq(container2a.get_content_type(), 'text/plain')
2591
2592    def test_default_type_with_explicit_container_type(self):
2593        eq = self.assertEqual
2594        with openfile('msg_28.txt') as fp:
2595            msg = email.message_from_file(fp)
2596        container1 = msg.get_payload(0)
2597        eq(container1.get_default_type(), 'message/rfc822')
2598        eq(container1.get_content_type(), 'message/rfc822')
2599        container2 = msg.get_payload(1)
2600        eq(container2.get_default_type(), 'message/rfc822')
2601        eq(container2.get_content_type(), 'message/rfc822')
2602        container1a = container1.get_payload(0)
2603        eq(container1a.get_default_type(), 'text/plain')
2604        eq(container1a.get_content_type(), 'text/plain')
2605        container2a = container2.get_payload(0)
2606        eq(container2a.get_default_type(), 'text/plain')
2607        eq(container2a.get_content_type(), 'text/plain')
2608
2609    def test_default_type_non_parsed(self):
2610        eq = self.assertEqual
2611        neq = self.ndiffAssertEqual
2612        # Set up container
2613        container = MIMEMultipart('digest', 'BOUNDARY')
2614        container.epilogue = ''
2615        # Set up subparts
2616        subpart1a = MIMEText('message 1\n')
2617        subpart2a = MIMEText('message 2\n')
2618        subpart1 = MIMEMessage(subpart1a)
2619        subpart2 = MIMEMessage(subpart2a)
2620        container.attach(subpart1)
2621        container.attach(subpart2)
2622        eq(subpart1.get_content_type(), 'message/rfc822')
2623        eq(subpart1.get_default_type(), 'message/rfc822')
2624        eq(subpart2.get_content_type(), 'message/rfc822')
2625        eq(subpart2.get_default_type(), 'message/rfc822')
2626        neq(container.as_string(0), '''\
2627Content-Type: multipart/digest; boundary="BOUNDARY"
2628MIME-Version: 1.0
2629
2630--BOUNDARY
2631Content-Type: message/rfc822
2632MIME-Version: 1.0
2633
2634Content-Type: text/plain; charset="us-ascii"
2635MIME-Version: 1.0
2636Content-Transfer-Encoding: 7bit
2637
2638message 1
2639
2640--BOUNDARY
2641Content-Type: message/rfc822
2642MIME-Version: 1.0
2643
2644Content-Type: text/plain; charset="us-ascii"
2645MIME-Version: 1.0
2646Content-Transfer-Encoding: 7bit
2647
2648message 2
2649
2650--BOUNDARY--
2651''')
2652        del subpart1['content-type']
2653        del subpart1['mime-version']
2654        del subpart2['content-type']
2655        del subpart2['mime-version']
2656        eq(subpart1.get_content_type(), 'message/rfc822')
2657        eq(subpart1.get_default_type(), 'message/rfc822')
2658        eq(subpart2.get_content_type(), 'message/rfc822')
2659        eq(subpart2.get_default_type(), 'message/rfc822')
2660        neq(container.as_string(0), '''\
2661Content-Type: multipart/digest; boundary="BOUNDARY"
2662MIME-Version: 1.0
2663
2664--BOUNDARY
2665
2666Content-Type: text/plain; charset="us-ascii"
2667MIME-Version: 1.0
2668Content-Transfer-Encoding: 7bit
2669
2670message 1
2671
2672--BOUNDARY
2673
2674Content-Type: text/plain; charset="us-ascii"
2675MIME-Version: 1.0
2676Content-Transfer-Encoding: 7bit
2677
2678message 2
2679
2680--BOUNDARY--
2681''')
2682
2683    def test_mime_attachments_in_constructor(self):
2684        eq = self.assertEqual
2685        text1 = MIMEText('')
2686        text2 = MIMEText('')
2687        msg = MIMEMultipart(_subparts=(text1, text2))
2688        eq(len(msg.get_payload()), 2)
2689        eq(msg.get_payload(0), text1)
2690        eq(msg.get_payload(1), text2)
2691
2692    def test_default_multipart_constructor(self):
2693        msg = MIMEMultipart()
2694        self.assertTrue(msg.is_multipart())
2695
2696    def test_multipart_default_policy(self):
2697        msg = MIMEMultipart()
2698        msg['To'] = 'a@b.com'
2699        msg['To'] = 'c@d.com'
2700        self.assertEqual(msg.get_all('to'), ['a@b.com', 'c@d.com'])
2701
2702    def test_multipart_custom_policy(self):
2703        msg = MIMEMultipart(policy=email.policy.default)
2704        msg['To'] = 'a@b.com'
2705        with self.assertRaises(ValueError) as cm:
2706            msg['To'] = 'c@d.com'
2707        self.assertEqual(str(cm.exception),
2708                         'There may be at most 1 To headers in a message')
2709
2710# A general test of parser->model->generator idempotency.  IOW, read a message
2711# in, parse it into a message object tree, then without touching the tree,
2712# regenerate the plain text.  The original text and the transformed text
2713# should be identical.  Note: that we ignore the Unix-From since that may
2714# contain a changed date.
2715class TestIdempotent(TestEmailBase):
2716
2717    linesep = '\n'
2718
2719    def _msgobj(self, filename):
2720        with openfile(filename) as fp:
2721            data = fp.read()
2722        msg = email.message_from_string(data)
2723        return msg, data
2724
2725    def _idempotent(self, msg, text, unixfrom=False):
2726        eq = self.ndiffAssertEqual
2727        s = StringIO()
2728        g = Generator(s, maxheaderlen=0)
2729        g.flatten(msg, unixfrom=unixfrom)
2730        eq(text, s.getvalue())
2731
2732    def test_parse_text_message(self):
2733        eq = self.assertEqual
2734        msg, text = self._msgobj('msg_01.txt')
2735        eq(msg.get_content_type(), 'text/plain')
2736        eq(msg.get_content_maintype(), 'text')
2737        eq(msg.get_content_subtype(), 'plain')
2738        eq(msg.get_params()[1], ('charset', 'us-ascii'))
2739        eq(msg.get_param('charset'), 'us-ascii')
2740        eq(msg.preamble, None)
2741        eq(msg.epilogue, None)
2742        self._idempotent(msg, text)
2743
2744    def test_parse_untyped_message(self):
2745        eq = self.assertEqual
2746        msg, text = self._msgobj('msg_03.txt')
2747        eq(msg.get_content_type(), 'text/plain')
2748        eq(msg.get_params(), None)
2749        eq(msg.get_param('charset'), None)
2750        self._idempotent(msg, text)
2751
2752    def test_simple_multipart(self):
2753        msg, text = self._msgobj('msg_04.txt')
2754        self._idempotent(msg, text)
2755
2756    def test_MIME_digest(self):
2757        msg, text = self._msgobj('msg_02.txt')
2758        self._idempotent(msg, text)
2759
2760    def test_long_header(self):
2761        msg, text = self._msgobj('msg_27.txt')
2762        self._idempotent(msg, text)
2763
2764    def test_MIME_digest_with_part_headers(self):
2765        msg, text = self._msgobj('msg_28.txt')
2766        self._idempotent(msg, text)
2767
2768    def test_mixed_with_image(self):
2769        msg, text = self._msgobj('msg_06.txt')
2770        self._idempotent(msg, text)
2771
2772    def test_multipart_report(self):
2773        msg, text = self._msgobj('msg_05.txt')
2774        self._idempotent(msg, text)
2775
2776    def test_dsn(self):
2777        msg, text = self._msgobj('msg_16.txt')
2778        self._idempotent(msg, text)
2779
2780    def test_preamble_epilogue(self):
2781        msg, text = self._msgobj('msg_21.txt')
2782        self._idempotent(msg, text)
2783
2784    def test_multipart_one_part(self):
2785        msg, text = self._msgobj('msg_23.txt')
2786        self._idempotent(msg, text)
2787
2788    def test_multipart_no_parts(self):
2789        msg, text = self._msgobj('msg_24.txt')
2790        self._idempotent(msg, text)
2791
2792    def test_no_start_boundary(self):
2793        msg, text = self._msgobj('msg_31.txt')
2794        self._idempotent(msg, text)
2795
2796    def test_rfc2231_charset(self):
2797        msg, text = self._msgobj('msg_32.txt')
2798        self._idempotent(msg, text)
2799
2800    def test_more_rfc2231_parameters(self):
2801        msg, text = self._msgobj('msg_33.txt')
2802        self._idempotent(msg, text)
2803
2804    def test_text_plain_in_a_multipart_digest(self):
2805        msg, text = self._msgobj('msg_34.txt')
2806        self._idempotent(msg, text)
2807
2808    def test_nested_multipart_mixeds(self):
2809        msg, text = self._msgobj('msg_12a.txt')
2810        self._idempotent(msg, text)
2811
2812    def test_message_external_body_idempotent(self):
2813        msg, text = self._msgobj('msg_36.txt')
2814        self._idempotent(msg, text)
2815
2816    def test_message_delivery_status(self):
2817        msg, text = self._msgobj('msg_43.txt')
2818        self._idempotent(msg, text, unixfrom=True)
2819
2820    def test_message_signed_idempotent(self):
2821        msg, text = self._msgobj('msg_45.txt')
2822        self._idempotent(msg, text)
2823
2824    def test_content_type(self):
2825        eq = self.assertEqual
2826        # Get a message object and reset the seek pointer for other tests
2827        msg, text = self._msgobj('msg_05.txt')
2828        eq(msg.get_content_type(), 'multipart/report')
2829        # Test the Content-Type: parameters
2830        params = {}
2831        for pk, pv in msg.get_params():
2832            params[pk] = pv
2833        eq(params['report-type'], 'delivery-status')
2834        eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2835        eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2836        eq(msg.epilogue, self.linesep)
2837        eq(len(msg.get_payload()), 3)
2838        # Make sure the subparts are what we expect
2839        msg1 = msg.get_payload(0)
2840        eq(msg1.get_content_type(), 'text/plain')
2841        eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
2842        msg2 = msg.get_payload(1)
2843        eq(msg2.get_content_type(), 'text/plain')
2844        eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
2845        msg3 = msg.get_payload(2)
2846        eq(msg3.get_content_type(), 'message/rfc822')
2847        self.assertIsInstance(msg3, Message)
2848        payload = msg3.get_payload()
2849        self.assertIsInstance(payload, list)
2850        eq(len(payload), 1)
2851        msg4 = payload[0]
2852        self.assertIsInstance(msg4, Message)
2853        eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
2854
2855    def test_parser(self):
2856        eq = self.assertEqual
2857        msg, text = self._msgobj('msg_06.txt')
2858        # Check some of the outer headers
2859        eq(msg.get_content_type(), 'message/rfc822')
2860        # Make sure the payload is a list of exactly one sub-Message, and that
2861        # that submessage has a type of text/plain
2862        payload = msg.get_payload()
2863        self.assertIsInstance(payload, list)
2864        eq(len(payload), 1)
2865        msg1 = payload[0]
2866        self.assertIsInstance(msg1, Message)
2867        eq(msg1.get_content_type(), 'text/plain')
2868        self.assertIsInstance(msg1.get_payload(), str)
2869        eq(msg1.get_payload(), self.linesep)
2870
2871
2872
2873# Test various other bits of the package's functionality
2874class TestMiscellaneous(TestEmailBase):
2875    def test_message_from_string(self):
2876        with openfile('msg_01.txt') as fp:
2877            text = fp.read()
2878        msg = email.message_from_string(text)
2879        s = StringIO()
2880        # Don't wrap/continue long headers since we're trying to test
2881        # idempotency.
2882        g = Generator(s, maxheaderlen=0)
2883        g.flatten(msg)
2884        self.assertEqual(text, s.getvalue())
2885
2886    def test_message_from_file(self):
2887        with openfile('msg_01.txt') as fp:
2888            text = fp.read()
2889            fp.seek(0)
2890            msg = email.message_from_file(fp)
2891            s = StringIO()
2892            # Don't wrap/continue long headers since we're trying to test
2893            # idempotency.
2894            g = Generator(s, maxheaderlen=0)
2895            g.flatten(msg)
2896            self.assertEqual(text, s.getvalue())
2897
2898    def test_message_from_string_with_class(self):
2899        with openfile('msg_01.txt') as fp:
2900            text = fp.read()
2901
2902        # Create a subclass
2903        class MyMessage(Message):
2904            pass
2905
2906        msg = email.message_from_string(text, MyMessage)
2907        self.assertIsInstance(msg, MyMessage)
2908        # Try something more complicated
2909        with openfile('msg_02.txt') as fp:
2910            text = fp.read()
2911        msg = email.message_from_string(text, MyMessage)
2912        for subpart in msg.walk():
2913            self.assertIsInstance(subpart, MyMessage)
2914
2915    def test_message_from_file_with_class(self):
2916        # Create a subclass
2917        class MyMessage(Message):
2918            pass
2919
2920        with openfile('msg_01.txt') as fp:
2921            msg = email.message_from_file(fp, MyMessage)
2922        self.assertIsInstance(msg, MyMessage)
2923        # Try something more complicated
2924        with openfile('msg_02.txt') as fp:
2925            msg = email.message_from_file(fp, MyMessage)
2926        for subpart in msg.walk():
2927            self.assertIsInstance(subpart, MyMessage)
2928
2929    def test_custom_message_does_not_require_arguments(self):
2930        class MyMessage(Message):
2931            def __init__(self):
2932                super().__init__()
2933        msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2934        self.assertIsInstance(msg, MyMessage)
2935
2936    def test__all__(self):
2937        module = __import__('email')
2938        self.assertEqual(sorted(module.__all__), [
2939            'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2940            'generator', 'header', 'iterators', 'message',
2941            'message_from_binary_file', 'message_from_bytes',
2942            'message_from_file', 'message_from_string', 'mime', 'parser',
2943            'quoprimime', 'utils',
2944            ])
2945
2946    def test_formatdate(self):
2947        now = time.time()
2948        self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2949                         time.gmtime(now)[:6])
2950
2951    def test_formatdate_localtime(self):
2952        now = time.time()
2953        self.assertEqual(
2954            utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2955            time.localtime(now)[:6])
2956
2957    def test_formatdate_usegmt(self):
2958        now = time.time()
2959        self.assertEqual(
2960            utils.formatdate(now, localtime=False),
2961            time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2962        self.assertEqual(
2963            utils.formatdate(now, localtime=False, usegmt=True),
2964            time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2965
2966    # parsedate and parsedate_tz will become deprecated interfaces someday
2967    def test_parsedate_returns_None_for_invalid_strings(self):
2968        self.assertIsNone(utils.parsedate(''))
2969        self.assertIsNone(utils.parsedate_tz(''))
2970        self.assertIsNone(utils.parsedate('0'))
2971        self.assertIsNone(utils.parsedate_tz('0'))
2972        self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2973        self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2974        # Not a part of the spec but, but this has historically worked:
2975        self.assertIsNone(utils.parsedate(None))
2976        self.assertIsNone(utils.parsedate_tz(None))
2977
2978    def test_parsedate_compact(self):
2979        # The FWS after the comma is optional
2980        self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2981                         utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2982
2983    def test_parsedate_no_dayofweek(self):
2984        eq = self.assertEqual
2985        eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2986           (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2987
2988    def test_parsedate_compact_no_dayofweek(self):
2989        eq = self.assertEqual
2990        eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2991           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2992
2993    def test_parsedate_no_space_before_positive_offset(self):
2994        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2995           (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2996
2997    def test_parsedate_no_space_before_negative_offset(self):
2998        # Issue 1155362: we already handled '+' for this case.
2999        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
3000           (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
3001
3002
3003    def test_parsedate_accepts_time_with_dots(self):
3004        eq = self.assertEqual
3005        eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
3006           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3007        eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
3008           (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
3009
3010    def test_parsedate_acceptable_to_time_functions(self):
3011        eq = self.assertEqual
3012        timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
3013        t = int(time.mktime(timetup))
3014        eq(time.localtime(t)[:6], timetup[:6])
3015        eq(int(time.strftime('%Y', timetup)), 2003)
3016        timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
3017        t = int(time.mktime(timetup[:9]))
3018        eq(time.localtime(t)[:6], timetup[:6])
3019        eq(int(time.strftime('%Y', timetup[:9])), 2003)
3020
3021    def test_mktime_tz(self):
3022        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3023                                          -1, -1, -1, 0)), 0)
3024        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3025                                          -1, -1, -1, 1234)), -1234)
3026
3027    def test_parsedate_y2k(self):
3028        """Test for parsing a date with a two-digit year.
3029
3030        Parsing a date with a two-digit year should return the correct
3031        four-digit year. RFC822 allows two-digit years, but RFC2822 (which
3032        obsoletes RFC822) requires four-digit years.
3033
3034        """
3035        self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
3036                         utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
3037        self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
3038                         utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
3039
3040    def test_parseaddr_empty(self):
3041        self.assertEqual(utils.parseaddr('<>'), ('', ''))
3042        self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
3043
3044    def test_parseaddr_multiple_domains(self):
3045        self.assertEqual(
3046            utils.parseaddr('a@b@c'),
3047            ('', '')
3048        )
3049        self.assertEqual(
3050            utils.parseaddr('a@b.c@c'),
3051            ('', '')
3052        )
3053        self.assertEqual(
3054            utils.parseaddr('a@172.17.0.1@c'),
3055            ('', '')
3056        )
3057
3058    def test_noquote_dump(self):
3059        self.assertEqual(
3060            utils.formataddr(('A Silly Person', 'person@dom.ain')),
3061            'A Silly Person <person@dom.ain>')
3062
3063    def test_escape_dump(self):
3064        self.assertEqual(
3065            utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
3066            r'"A (Very) Silly Person" <person@dom.ain>')
3067        self.assertEqual(
3068            utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
3069            ('A (Very) Silly Person', 'person@dom.ain'))
3070        a = r'A \(Special\) Person'
3071        b = 'person@dom.ain'
3072        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3073
3074    def test_escape_backslashes(self):
3075        self.assertEqual(
3076            utils.formataddr((r'Arthur \Backslash\ Foobar', 'person@dom.ain')),
3077            r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
3078        a = r'Arthur \Backslash\ Foobar'
3079        b = 'person@dom.ain'
3080        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3081
3082    def test_quotes_unicode_names(self):
3083        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3084        name = "H\u00e4ns W\u00fcrst"
3085        addr = 'person@dom.ain'
3086        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
3087        latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
3088        self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
3089        self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
3090            latin1_quopri)
3091
3092    def test_accepts_any_charset_like_object(self):
3093        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3094        name = "H\u00e4ns W\u00fcrst"
3095        addr = 'person@dom.ain'
3096        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
3097        foobar = "FOOBAR"
3098        class CharsetMock:
3099            def header_encode(self, string):
3100                return foobar
3101        mock = CharsetMock()
3102        mock_expected = "%s <%s>" % (foobar, addr)
3103        self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
3104        self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
3105            utf8_base64)
3106
3107    def test_invalid_charset_like_object_raises_error(self):
3108        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3109        name = "H\u00e4ns W\u00fcrst"
3110        addr = 'person@dom.ain'
3111        # An object without a header_encode method:
3112        bad_charset = object()
3113        self.assertRaises(AttributeError, utils.formataddr, (name, addr),
3114            bad_charset)
3115
3116    def test_unicode_address_raises_error(self):
3117        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3118        addr = 'pers\u00f6n@dom.in'
3119        self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
3120        self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
3121
3122    def test_name_with_dot(self):
3123        x = 'John X. Doe <jxd@example.com>'
3124        y = '"John X. Doe" <jxd@example.com>'
3125        a, b = ('John X. Doe', 'jxd@example.com')
3126        self.assertEqual(utils.parseaddr(x), (a, b))
3127        self.assertEqual(utils.parseaddr(y), (a, b))
3128        # formataddr() quotes the name if there's a dot in it
3129        self.assertEqual(utils.formataddr((a, b)), y)
3130
3131    def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
3132        # issue 10005.  Note that in the third test the second pair of
3133        # backslashes is not actually a quoted pair because it is not inside a
3134        # comment or quoted string: the address being parsed has a quoted
3135        # string containing a quoted backslash, followed by 'example' and two
3136        # backslashes, followed by another quoted string containing a space and
3137        # the word 'example'.  parseaddr copies those two backslashes
3138        # literally.  Per rfc5322 this is not technically correct since a \ may
3139        # not appear in an address outside of a quoted string.  It is probably
3140        # a sensible Postel interpretation, though.
3141        eq = self.assertEqual
3142        eq(utils.parseaddr('""example" example"@example.com'),
3143          ('', '""example" example"@example.com'))
3144        eq(utils.parseaddr('"\\"example\\" example"@example.com'),
3145          ('', '"\\"example\\" example"@example.com'))
3146        eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
3147          ('', '"\\\\"example\\\\" example"@example.com'))
3148
3149    def test_parseaddr_preserves_spaces_in_local_part(self):
3150        # issue 9286.  A normal RFC5322 local part should not contain any
3151        # folding white space, but legacy local parts can (they are a sequence
3152        # of atoms, not dotatoms).  On the other hand we strip whitespace from
3153        # before the @ and around dots, on the assumption that the whitespace
3154        # around the punctuation is a mistake in what would otherwise be
3155        # an RFC5322 local part.  Leading whitespace is, usual, stripped as well.
3156        self.assertEqual(('', "merwok wok@xample.com"),
3157            utils.parseaddr("merwok wok@xample.com"))
3158        self.assertEqual(('', "merwok  wok@xample.com"),
3159            utils.parseaddr("merwok  wok@xample.com"))
3160        self.assertEqual(('', "merwok  wok@xample.com"),
3161            utils.parseaddr(" merwok  wok  @xample.com"))
3162        self.assertEqual(('', 'merwok"wok"  wok@xample.com'),
3163            utils.parseaddr('merwok"wok"  wok@xample.com'))
3164        self.assertEqual(('', 'merwok.wok.wok@xample.com'),
3165            utils.parseaddr('merwok. wok .  wok@xample.com'))
3166
3167    def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3168        addr = ("'foo@example.com' (foo@example.com)",
3169                'foo@example.com')
3170        addrstr = ('"\'foo@example.com\' '
3171                            '(foo@example.com)" <foo@example.com>')
3172        self.assertEqual(utils.parseaddr(addrstr), addr)
3173        self.assertEqual(utils.formataddr(addr), addrstr)
3174
3175
3176    def test_multiline_from_comment(self):
3177        x = """\
3178Foo
3179\tBar <foo@example.com>"""
3180        self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3181
3182    def test_quote_dump(self):
3183        self.assertEqual(
3184            utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3185            r'"A Silly; Person" <person@dom.ain>')
3186
3187    def test_charset_richcomparisons(self):
3188        eq = self.assertEqual
3189        ne = self.assertNotEqual
3190        cset1 = Charset()
3191        cset2 = Charset()
3192        eq(cset1, 'us-ascii')
3193        eq(cset1, 'US-ASCII')
3194        eq(cset1, 'Us-AsCiI')
3195        eq('us-ascii', cset1)
3196        eq('US-ASCII', cset1)
3197        eq('Us-AsCiI', cset1)
3198        ne(cset1, 'usascii')
3199        ne(cset1, 'USASCII')
3200        ne(cset1, 'UsAsCiI')
3201        ne('usascii', cset1)
3202        ne('USASCII', cset1)
3203        ne('UsAsCiI', cset1)
3204        eq(cset1, cset2)
3205        eq(cset2, cset1)
3206
3207    def test_getaddresses(self):
3208        eq = self.assertEqual
3209        eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3210                               'Bud Person <bperson@dom.ain>']),
3211           [('Al Person', 'aperson@dom.ain'),
3212            ('Bud Person', 'bperson@dom.ain')])
3213
3214    def test_getaddresses_nasty(self):
3215        eq = self.assertEqual
3216        eq(utils.getaddresses(['foo: ;']), [('', '')])
3217        eq(utils.getaddresses(
3218           ['[]*-- =~$']),
3219           [('', ''), ('', ''), ('', '*--')])
3220        eq(utils.getaddresses(
3221           ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3222           [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3223
3224    def test_getaddresses_embedded_comment(self):
3225        """Test proper handling of a nested comment"""
3226        eq = self.assertEqual
3227        addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3228        eq(addrs[0][1], 'foo@bar.com')
3229
3230    def test_make_msgid_collisions(self):
3231        # Test make_msgid uniqueness, even with multiple threads
3232        class MsgidsThread(Thread):
3233            def run(self):
3234                # generate msgids for 3 seconds
3235                self.msgids = []
3236                append = self.msgids.append
3237                make_msgid = utils.make_msgid
3238                clock = time.monotonic
3239                tfin = clock() + 3.0
3240                while clock() < tfin:
3241                    append(make_msgid(domain='testdomain-string'))
3242
3243        threads = [MsgidsThread() for i in range(5)]
3244        with start_threads(threads):
3245            pass
3246        all_ids = sum([t.msgids for t in threads], [])
3247        self.assertEqual(len(set(all_ids)), len(all_ids))
3248
3249    def test_utils_quote_unquote(self):
3250        eq = self.assertEqual
3251        msg = Message()
3252        msg.add_header('content-disposition', 'attachment',
3253                       filename='foo\\wacky"name')
3254        eq(msg.get_filename(), 'foo\\wacky"name')
3255
3256    def test_get_body_encoding_with_bogus_charset(self):
3257        charset = Charset('not a charset')
3258        self.assertEqual(charset.get_body_encoding(), 'base64')
3259
3260    def test_get_body_encoding_with_uppercase_charset(self):
3261        eq = self.assertEqual
3262        msg = Message()
3263        msg['Content-Type'] = 'text/plain; charset=UTF-8'
3264        eq(msg['content-type'], 'text/plain; charset=UTF-8')
3265        charsets = msg.get_charsets()
3266        eq(len(charsets), 1)
3267        eq(charsets[0], 'utf-8')
3268        charset = Charset(charsets[0])
3269        eq(charset.get_body_encoding(), 'base64')
3270        msg.set_payload(b'hello world', charset=charset)
3271        eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3272        eq(msg.get_payload(decode=True), b'hello world')
3273        eq(msg['content-transfer-encoding'], 'base64')
3274        # Try another one
3275        msg = Message()
3276        msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3277        charsets = msg.get_charsets()
3278        eq(len(charsets), 1)
3279        eq(charsets[0], 'us-ascii')
3280        charset = Charset(charsets[0])
3281        eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3282        msg.set_payload('hello world', charset=charset)
3283        eq(msg.get_payload(), 'hello world')
3284        eq(msg['content-transfer-encoding'], '7bit')
3285
3286    def test_charsets_case_insensitive(self):
3287        lc = Charset('us-ascii')
3288        uc = Charset('US-ASCII')
3289        self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3290
3291    def test_partial_falls_inside_message_delivery_status(self):
3292        eq = self.ndiffAssertEqual
3293        # The Parser interface provides chunks of data to FeedParser in 8192
3294        # byte gulps.  SF bug #1076485 found one of those chunks inside
3295        # message/delivery-status header block, which triggered an
3296        # unreadline() of NeedMoreData.
3297        msg = self._msgobj('msg_43.txt')
3298        sfp = StringIO()
3299        iterators._structure(msg, sfp)
3300        eq(sfp.getvalue(), """\
3301multipart/report
3302    text/plain
3303    message/delivery-status
3304        text/plain
3305        text/plain
3306        text/plain
3307        text/plain
3308        text/plain
3309        text/plain
3310        text/plain
3311        text/plain
3312        text/plain
3313        text/plain
3314        text/plain
3315        text/plain
3316        text/plain
3317        text/plain
3318        text/plain
3319        text/plain
3320        text/plain
3321        text/plain
3322        text/plain
3323        text/plain
3324        text/plain
3325        text/plain
3326        text/plain
3327        text/plain
3328        text/plain
3329        text/plain
3330    text/rfc822-headers
3331""")
3332
3333    def test_make_msgid_domain(self):
3334        self.assertEqual(
3335            email.utils.make_msgid(domain='testdomain-string')[-19:],
3336            '@testdomain-string>')
3337
3338    def test_make_msgid_idstring(self):
3339        self.assertEqual(
3340            email.utils.make_msgid(idstring='test-idstring',
3341                domain='testdomain-string')[-33:],
3342            '.test-idstring@testdomain-string>')
3343
3344    def test_make_msgid_default_domain(self):
3345        self.assertTrue(
3346            email.utils.make_msgid().endswith(
3347                '@' + getfqdn() + '>'))
3348
3349    def test_Generator_linend(self):
3350        # Issue 14645.
3351        with openfile('msg_26.txt', newline='\n') as f:
3352            msgtxt = f.read()
3353        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3354        msg = email.message_from_string(msgtxt)
3355        s = StringIO()
3356        g = email.generator.Generator(s)
3357        g.flatten(msg)
3358        self.assertEqual(s.getvalue(), msgtxt_nl)
3359
3360    def test_BytesGenerator_linend(self):
3361        # Issue 14645.
3362        with openfile('msg_26.txt', newline='\n') as f:
3363            msgtxt = f.read()
3364        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3365        msg = email.message_from_string(msgtxt_nl)
3366        s = BytesIO()
3367        g = email.generator.BytesGenerator(s)
3368        g.flatten(msg, linesep='\r\n')
3369        self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3370
3371    def test_BytesGenerator_linend_with_non_ascii(self):
3372        # Issue 14645.
3373        with openfile('msg_26.txt', 'rb') as f:
3374            msgtxt = f.read()
3375        msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3376        msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3377        msg = email.message_from_bytes(msgtxt_nl)
3378        s = BytesIO()
3379        g = email.generator.BytesGenerator(s)
3380        g.flatten(msg, linesep='\r\n')
3381        self.assertEqual(s.getvalue(), msgtxt)
3382
3383    def test_mime_classes_policy_argument(self):
3384        with openfile('audiotest.au', 'rb') as fp:
3385            audiodata = fp.read()
3386        with openfile('PyBanner048.gif', 'rb') as fp:
3387            bindata = fp.read()
3388        classes = [
3389            (MIMEApplication, ('',)),
3390            (MIMEAudio, (audiodata,)),
3391            (MIMEImage, (bindata,)),
3392            (MIMEMessage, (Message(),)),
3393            (MIMENonMultipart, ('multipart', 'mixed')),
3394            (MIMEText, ('',)),
3395        ]
3396        for cls, constructor in classes:
3397            with self.subTest(cls=cls.__name__, policy='compat32'):
3398                m = cls(*constructor)
3399                self.assertIs(m.policy, email.policy.compat32)
3400            with self.subTest(cls=cls.__name__, policy='default'):
3401                m = cls(*constructor, policy=email.policy.default)
3402                self.assertIs(m.policy, email.policy.default)
3403
3404
3405# Test the iterator/generators
3406class TestIterators(TestEmailBase):
3407    def test_body_line_iterator(self):
3408        eq = self.assertEqual
3409        neq = self.ndiffAssertEqual
3410        # First a simple non-multipart message
3411        msg = self._msgobj('msg_01.txt')
3412        it = iterators.body_line_iterator(msg)
3413        lines = list(it)
3414        eq(len(lines), 6)
3415        neq(EMPTYSTRING.join(lines), msg.get_payload())
3416        # Now a more complicated multipart
3417        msg = self._msgobj('msg_02.txt')
3418        it = iterators.body_line_iterator(msg)
3419        lines = list(it)
3420        eq(len(lines), 43)
3421        with openfile('msg_19.txt') as fp:
3422            neq(EMPTYSTRING.join(lines), fp.read())
3423
3424    def test_typed_subpart_iterator(self):
3425        eq = self.assertEqual
3426        msg = self._msgobj('msg_04.txt')
3427        it = iterators.typed_subpart_iterator(msg, 'text')
3428        lines = []
3429        subparts = 0
3430        for subpart in it:
3431            subparts += 1
3432            lines.append(subpart.get_payload())
3433        eq(subparts, 2)
3434        eq(EMPTYSTRING.join(lines), """\
3435a simple kind of mirror
3436to reflect upon our own
3437a simple kind of mirror
3438to reflect upon our own
3439""")
3440
3441    def test_typed_subpart_iterator_default_type(self):
3442        eq = self.assertEqual
3443        msg = self._msgobj('msg_03.txt')
3444        it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3445        lines = []
3446        subparts = 0
3447        for subpart in it:
3448            subparts += 1
3449            lines.append(subpart.get_payload())
3450        eq(subparts, 1)
3451        eq(EMPTYSTRING.join(lines), """\
3452
3453Hi,
3454
3455Do you like this message?
3456
3457-Me
3458""")
3459
3460    def test_pushCR_LF(self):
3461        '''FeedParser BufferedSubFile.push() assumed it received complete
3462           line endings.  A CR ending one push() followed by a LF starting
3463           the next push() added an empty line.
3464        '''
3465        imt = [
3466            ("a\r \n",  2),
3467            ("b",       0),
3468            ("c\n",     1),
3469            ("",        0),
3470            ("d\r\n",   1),
3471            ("e\r",     0),
3472            ("\nf",     1),
3473            ("\r\n",    1),
3474          ]
3475        from email.feedparser import BufferedSubFile, NeedMoreData
3476        bsf = BufferedSubFile()
3477        om = []
3478        nt = 0
3479        for il, n in imt:
3480            bsf.push(il)
3481            nt += n
3482            n1 = 0
3483            for ol in iter(bsf.readline, NeedMoreData):
3484                om.append(ol)
3485                n1 += 1
3486            self.assertEqual(n, n1)
3487        self.assertEqual(len(om), nt)
3488        self.assertEqual(''.join([il for il, n in imt]), ''.join(om))
3489
3490    def test_push_random(self):
3491        from email.feedparser import BufferedSubFile, NeedMoreData
3492
3493        n = 10000
3494        chunksize = 5
3495        chars = 'abcd \t\r\n'
3496
3497        s = ''.join(choice(chars) for i in range(n)) + '\n'
3498        target = s.splitlines(True)
3499
3500        bsf = BufferedSubFile()
3501        lines = []
3502        for i in range(0, len(s), chunksize):
3503            chunk = s[i:i+chunksize]
3504            bsf.push(chunk)
3505            lines.extend(iter(bsf.readline, NeedMoreData))
3506        self.assertEqual(lines, target)
3507
3508
3509class TestFeedParsers(TestEmailBase):
3510
3511    def parse(self, chunks):
3512        feedparser = FeedParser()
3513        for chunk in chunks:
3514            feedparser.feed(chunk)
3515        return feedparser.close()
3516
3517    def test_empty_header_name_handled(self):
3518        # Issue 19996
3519        msg = self.parse("First: val\n: bad\nSecond: val")
3520        self.assertEqual(msg['First'], 'val')
3521        self.assertEqual(msg['Second'], 'val')
3522
3523    def test_newlines(self):
3524        m = self.parse(['a:\nb:\rc:\r\nd:\n'])
3525        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3526        m = self.parse(['a:\nb:\rc:\r\nd:'])
3527        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3528        m = self.parse(['a:\rb', 'c:\n'])
3529        self.assertEqual(m.keys(), ['a', 'bc'])
3530        m = self.parse(['a:\r', 'b:\n'])
3531        self.assertEqual(m.keys(), ['a', 'b'])
3532        m = self.parse(['a:\r', '\nb:\n'])
3533        self.assertEqual(m.keys(), ['a', 'b'])
3534
3535        # Only CR and LF should break header fields
3536        m = self.parse(['a:\x85b:\u2028c:\n'])
3537        self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')])
3538        m = self.parse(['a:\r', 'b:\x85', 'c:\n'])
3539        self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')])
3540
3541    def test_long_lines(self):
3542        # Expected peak memory use on 32-bit platform: 6*N*M bytes.
3543        M, N = 1000, 20000
3544        m = self.parse(['a:b\n\n'] + ['x'*M] * N)
3545        self.assertEqual(m.items(), [('a', 'b')])
3546        self.assertEqual(m.get_payload(), 'x'*M*N)
3547        m = self.parse(['a:b\r\r'] + ['x'*M] * N)
3548        self.assertEqual(m.items(), [('a', 'b')])
3549        self.assertEqual(m.get_payload(), 'x'*M*N)
3550        m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N)
3551        self.assertEqual(m.items(), [('a', 'b')])
3552        self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N)
3553        m = self.parse(['a:\r', 'b: '] + ['x'*M] * N)
3554        self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)])
3555
3556
3557class TestParsers(TestEmailBase):
3558
3559    def test_header_parser(self):
3560        eq = self.assertEqual
3561        # Parse only the headers of a complex multipart MIME document
3562        with openfile('msg_02.txt') as fp:
3563            msg = HeaderParser().parse(fp)
3564        eq(msg['from'], 'ppp-request@zzz.org')
3565        eq(msg['to'], 'ppp@zzz.org')
3566        eq(msg.get_content_type(), 'multipart/mixed')
3567        self.assertFalse(msg.is_multipart())
3568        self.assertIsInstance(msg.get_payload(), str)
3569
3570    def test_bytes_header_parser(self):
3571        eq = self.assertEqual
3572        # Parse only the headers of a complex multipart MIME document
3573        with openfile('msg_02.txt', 'rb') as fp:
3574            msg = email.parser.BytesHeaderParser().parse(fp)
3575        eq(msg['from'], 'ppp-request@zzz.org')
3576        eq(msg['to'], 'ppp@zzz.org')
3577        eq(msg.get_content_type(), 'multipart/mixed')
3578        self.assertFalse(msg.is_multipart())
3579        self.assertIsInstance(msg.get_payload(), str)
3580        self.assertIsInstance(msg.get_payload(decode=True), bytes)
3581
3582    def test_bytes_parser_does_not_close_file(self):
3583        with openfile('msg_02.txt', 'rb') as fp:
3584            email.parser.BytesParser().parse(fp)
3585            self.assertFalse(fp.closed)
3586
3587    def test_bytes_parser_on_exception_does_not_close_file(self):
3588        with openfile('msg_15.txt', 'rb') as fp:
3589            bytesParser = email.parser.BytesParser
3590            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3591                              bytesParser(policy=email.policy.strict).parse,
3592                              fp)
3593            self.assertFalse(fp.closed)
3594
3595    def test_parser_does_not_close_file(self):
3596        with openfile('msg_02.txt', 'r') as fp:
3597            email.parser.Parser().parse(fp)
3598            self.assertFalse(fp.closed)
3599
3600    def test_parser_on_exception_does_not_close_file(self):
3601        with openfile('msg_15.txt', 'r') as fp:
3602            parser = email.parser.Parser
3603            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3604                              parser(policy=email.policy.strict).parse, fp)
3605            self.assertFalse(fp.closed)
3606
3607    def test_whitespace_continuation(self):
3608        eq = self.assertEqual
3609        # This message contains a line after the Subject: header that has only
3610        # whitespace, but it is not empty!
3611        msg = email.message_from_string("""\
3612From: aperson@dom.ain
3613To: bperson@dom.ain
3614Subject: the next line has a space on it
3615\x20
3616Date: Mon, 8 Apr 2002 15:09:19 -0400
3617Message-ID: spam
3618
3619Here's the message body
3620""")
3621        eq(msg['subject'], 'the next line has a space on it\n ')
3622        eq(msg['message-id'], 'spam')
3623        eq(msg.get_payload(), "Here's the message body\n")
3624
3625    def test_whitespace_continuation_last_header(self):
3626        eq = self.assertEqual
3627        # Like the previous test, but the subject line is the last
3628        # header.
3629        msg = email.message_from_string("""\
3630From: aperson@dom.ain
3631To: bperson@dom.ain
3632Date: Mon, 8 Apr 2002 15:09:19 -0400
3633Message-ID: spam
3634Subject: the next line has a space on it
3635\x20
3636
3637Here's the message body
3638""")
3639        eq(msg['subject'], 'the next line has a space on it\n ')
3640        eq(msg['message-id'], 'spam')
3641        eq(msg.get_payload(), "Here's the message body\n")
3642
3643    def test_crlf_separation(self):
3644        eq = self.assertEqual
3645        with openfile('msg_26.txt', newline='\n') as fp:
3646            msg = Parser().parse(fp)
3647        eq(len(msg.get_payload()), 2)
3648        part1 = msg.get_payload(0)
3649        eq(part1.get_content_type(), 'text/plain')
3650        eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3651        part2 = msg.get_payload(1)
3652        eq(part2.get_content_type(), 'application/riscos')
3653
3654    def test_crlf_flatten(self):
3655        # Using newline='\n' preserves the crlfs in this input file.
3656        with openfile('msg_26.txt', newline='\n') as fp:
3657            text = fp.read()
3658        msg = email.message_from_string(text)
3659        s = StringIO()
3660        g = Generator(s)
3661        g.flatten(msg, linesep='\r\n')
3662        self.assertEqual(s.getvalue(), text)
3663
3664    maxDiff = None
3665
3666    def test_multipart_digest_with_extra_mime_headers(self):
3667        eq = self.assertEqual
3668        neq = self.ndiffAssertEqual
3669        with openfile('msg_28.txt') as fp:
3670            msg = email.message_from_file(fp)
3671        # Structure is:
3672        # multipart/digest
3673        #   message/rfc822
3674        #     text/plain
3675        #   message/rfc822
3676        #     text/plain
3677        eq(msg.is_multipart(), 1)
3678        eq(len(msg.get_payload()), 2)
3679        part1 = msg.get_payload(0)
3680        eq(part1.get_content_type(), 'message/rfc822')
3681        eq(part1.is_multipart(), 1)
3682        eq(len(part1.get_payload()), 1)
3683        part1a = part1.get_payload(0)
3684        eq(part1a.is_multipart(), 0)
3685        eq(part1a.get_content_type(), 'text/plain')
3686        neq(part1a.get_payload(), 'message 1\n')
3687        # next message/rfc822
3688        part2 = msg.get_payload(1)
3689        eq(part2.get_content_type(), 'message/rfc822')
3690        eq(part2.is_multipart(), 1)
3691        eq(len(part2.get_payload()), 1)
3692        part2a = part2.get_payload(0)
3693        eq(part2a.is_multipart(), 0)
3694        eq(part2a.get_content_type(), 'text/plain')
3695        neq(part2a.get_payload(), 'message 2\n')
3696
3697    def test_three_lines(self):
3698        # A bug report by Andrew McNamara
3699        lines = ['From: Andrew Person <aperson@dom.ain',
3700                 'Subject: Test',
3701                 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3702        msg = email.message_from_string(NL.join(lines))
3703        self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3704
3705    def test_strip_line_feed_and_carriage_return_in_headers(self):
3706        eq = self.assertEqual
3707        # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3708        value1 = 'text'
3709        value2 = 'more text'
3710        m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3711            value1, value2)
3712        msg = email.message_from_string(m)
3713        eq(msg.get('Header'), value1)
3714        eq(msg.get('Next-Header'), value2)
3715
3716    def test_rfc2822_header_syntax(self):
3717        eq = self.assertEqual
3718        m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3719        msg = email.message_from_string(m)
3720        eq(len(msg), 3)
3721        eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3722        eq(msg.get_payload(), 'body')
3723
3724    def test_rfc2822_space_not_allowed_in_header(self):
3725        eq = self.assertEqual
3726        m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3727        msg = email.message_from_string(m)
3728        eq(len(msg.keys()), 0)
3729
3730    def test_rfc2822_one_character_header(self):
3731        eq = self.assertEqual
3732        m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3733        msg = email.message_from_string(m)
3734        headers = msg.keys()
3735        headers.sort()
3736        eq(headers, ['A', 'B', 'CC'])
3737        eq(msg.get_payload(), 'body')
3738
3739    def test_CRLFLF_at_end_of_part(self):
3740        # issue 5610: feedparser should not eat two chars from body part ending
3741        # with "\r\n\n".
3742        m = (
3743            "From: foo@bar.com\n"
3744            "To: baz\n"
3745            "Mime-Version: 1.0\n"
3746            "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3747            "\n"
3748            "--BOUNDARY\n"
3749            "Content-Type: text/plain\n"
3750            "\n"
3751            "body ending with CRLF newline\r\n"
3752            "\n"
3753            "--BOUNDARY--\n"
3754          )
3755        msg = email.message_from_string(m)
3756        self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
3757
3758
3759class Test8BitBytesHandling(TestEmailBase):
3760    # In Python3 all input is string, but that doesn't work if the actual input
3761    # uses an 8bit transfer encoding.  To hack around that, in email 5.1 we
3762    # decode byte streams using the surrogateescape error handler, and
3763    # reconvert to binary at appropriate places if we detect surrogates.  This
3764    # doesn't allow us to transform headers with 8bit bytes (they get munged),
3765    # but it does allow us to parse and preserve them, and to decode body
3766    # parts that use an 8bit CTE.
3767
3768    bodytest_msg = textwrap.dedent("""\
3769        From: foo@bar.com
3770        To: baz
3771        Mime-Version: 1.0
3772        Content-Type: text/plain; charset={charset}
3773        Content-Transfer-Encoding: {cte}
3774
3775        {bodyline}
3776        """)
3777
3778    def test_known_8bit_CTE(self):
3779        m = self.bodytest_msg.format(charset='utf-8',
3780                                     cte='8bit',
3781                                     bodyline='pöstal').encode('utf-8')
3782        msg = email.message_from_bytes(m)
3783        self.assertEqual(msg.get_payload(), "pöstal\n")
3784        self.assertEqual(msg.get_payload(decode=True),
3785                         "pöstal\n".encode('utf-8'))
3786
3787    def test_unknown_8bit_CTE(self):
3788        m = self.bodytest_msg.format(charset='notavalidcharset',
3789                                     cte='8bit',
3790                                     bodyline='pöstal').encode('utf-8')
3791        msg = email.message_from_bytes(m)
3792        self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
3793        self.assertEqual(msg.get_payload(decode=True),
3794                         "pöstal\n".encode('utf-8'))
3795
3796    def test_8bit_in_quopri_body(self):
3797        # This is non-RFC compliant data...without 'decode' the library code
3798        # decodes the body using the charset from the headers, and because the
3799        # source byte really is utf-8 this works.  This is likely to fail
3800        # against real dirty data (ie: produce mojibake), but the data is
3801        # invalid anyway so it is as good a guess as any.  But this means that
3802        # this test just confirms the current behavior; that behavior is not
3803        # necessarily the best possible behavior.  With 'decode' it is
3804        # returning the raw bytes, so that test should be of correct behavior,
3805        # or at least produce the same result that email4 did.
3806        m = self.bodytest_msg.format(charset='utf-8',
3807                                     cte='quoted-printable',
3808                                     bodyline='p=C3=B6stál').encode('utf-8')
3809        msg = email.message_from_bytes(m)
3810        self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3811        self.assertEqual(msg.get_payload(decode=True),
3812                         'pöstál\n'.encode('utf-8'))
3813
3814    def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3815        # This is similar to the previous test, but proves that if the 8bit
3816        # byte is undecodeable in the specified charset, it gets replaced
3817        # by the unicode 'unknown' character.  Again, this may or may not
3818        # be the ideal behavior.  Note that if decode=False none of the
3819        # decoders will get involved, so this is the only test we need
3820        # for this behavior.
3821        m = self.bodytest_msg.format(charset='ascii',
3822                                     cte='quoted-printable',
3823                                     bodyline='p=C3=B6stál').encode('utf-8')
3824        msg = email.message_from_bytes(m)
3825        self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
3826        self.assertEqual(msg.get_payload(decode=True),
3827                        'pöstál\n'.encode('utf-8'))
3828
3829    # test_defect_handling:test_invalid_chars_in_base64_payload
3830    def test_8bit_in_base64_body(self):
3831        # If we get 8bit bytes in a base64 body, we can just ignore them
3832        # as being outside the base64 alphabet and decode anyway.  But
3833        # we register a defect.
3834        m = self.bodytest_msg.format(charset='utf-8',
3835                                     cte='base64',
3836                                     bodyline='cMO2c3RhbAá=').encode('utf-8')
3837        msg = email.message_from_bytes(m)
3838        self.assertEqual(msg.get_payload(decode=True),
3839                         'pöstal'.encode('utf-8'))
3840        self.assertIsInstance(msg.defects[0],
3841                              errors.InvalidBase64CharactersDefect)
3842
3843    def test_8bit_in_uuencode_body(self):
3844        # Sticking an 8bit byte in a uuencode block makes it undecodable by
3845        # normal means, so the block is returned undecoded, but as bytes.
3846        m = self.bodytest_msg.format(charset='utf-8',
3847                                     cte='uuencode',
3848                                     bodyline='<,.V<W1A; á ').encode('utf-8')
3849        msg = email.message_from_bytes(m)
3850        self.assertEqual(msg.get_payload(decode=True),
3851                         '<,.V<W1A; á \n'.encode('utf-8'))
3852
3853
3854    headertest_headers = (
3855        ('From: foo@bar.com', ('From', 'foo@bar.com')),
3856        ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3857        ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3858            '\tJean de Baddie',
3859            ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3860                'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3861                ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3862        ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3863        )
3864    headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3865        '\nYes, they are flying.\n').encode('utf-8')
3866
3867    def test_get_8bit_header(self):
3868        msg = email.message_from_bytes(self.headertest_msg)
3869        self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3870        self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
3871
3872    def test_print_8bit_headers(self):
3873        msg = email.message_from_bytes(self.headertest_msg)
3874        self.assertEqual(str(msg),
3875                         textwrap.dedent("""\
3876                            From: {}
3877                            To: {}
3878                            Subject: {}
3879                            From: {}
3880
3881                            Yes, they are flying.
3882                            """).format(*[expected[1] for (_, expected) in
3883                                        self.headertest_headers]))
3884
3885    def test_values_with_8bit_headers(self):
3886        msg = email.message_from_bytes(self.headertest_msg)
3887        self.assertListEqual([str(x) for x in msg.values()],
3888                              ['foo@bar.com',
3889                               'b\uFFFD\uFFFDz',
3890                               'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3891                                   'coll\uFFFD\uFFFDgue, le pouf '
3892                                   'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3893                                   '\tJean de Baddie',
3894                               "g\uFFFD\uFFFDst"])
3895
3896    def test_items_with_8bit_headers(self):
3897        msg = email.message_from_bytes(self.headertest_msg)
3898        self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
3899                              [('From', 'foo@bar.com'),
3900                               ('To', 'b\uFFFD\uFFFDz'),
3901                               ('Subject', 'Maintenant je vous '
3902                                  'pr\uFFFD\uFFFDsente '
3903                                  'mon coll\uFFFD\uFFFDgue, le pouf '
3904                                  'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3905                                  '\tJean de Baddie'),
3906                               ('From', 'g\uFFFD\uFFFDst')])
3907
3908    def test_get_all_with_8bit_headers(self):
3909        msg = email.message_from_bytes(self.headertest_msg)
3910        self.assertListEqual([str(x) for x in msg.get_all('from')],
3911                              ['foo@bar.com',
3912                               'g\uFFFD\uFFFDst'])
3913
3914    def test_get_content_type_with_8bit(self):
3915        msg = email.message_from_bytes(textwrap.dedent("""\
3916            Content-Type: text/pl\xA7in; charset=utf-8
3917            """).encode('latin-1'))
3918        self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3919        self.assertEqual(msg.get_content_maintype(), "text")
3920        self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3921
3922    # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
3923    def test_get_params_with_8bit(self):
3924        msg = email.message_from_bytes(
3925            'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3926        self.assertEqual(msg.get_params(header='x-header'),
3927           [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3928        self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3929        # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3930        self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3931
3932    # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
3933    def test_get_rfc2231_params_with_8bit(self):
3934        msg = email.message_from_bytes(textwrap.dedent("""\
3935            Content-Type: text/plain; charset=us-ascii;
3936             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3937             ).encode('latin-1'))
3938        self.assertEqual(msg.get_param('title'),
3939            ('us-ascii', 'en', 'This is not f\uFFFDn'))
3940
3941    def test_set_rfc2231_params_with_8bit(self):
3942        msg = email.message_from_bytes(textwrap.dedent("""\
3943            Content-Type: text/plain; charset=us-ascii;
3944             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3945             ).encode('latin-1'))
3946        msg.set_param('title', 'test')
3947        self.assertEqual(msg.get_param('title'), 'test')
3948
3949    def test_del_rfc2231_params_with_8bit(self):
3950        msg = email.message_from_bytes(textwrap.dedent("""\
3951            Content-Type: text/plain; charset=us-ascii;
3952             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3953             ).encode('latin-1'))
3954        msg.del_param('title')
3955        self.assertEqual(msg.get_param('title'), None)
3956        self.assertEqual(msg.get_content_maintype(), 'text')
3957
3958    def test_get_payload_with_8bit_cte_header(self):
3959        msg = email.message_from_bytes(textwrap.dedent("""\
3960            Content-Transfer-Encoding: b\xa7se64
3961            Content-Type: text/plain; charset=latin-1
3962
3963            payload
3964            """).encode('latin-1'))
3965        self.assertEqual(msg.get_payload(), 'payload\n')
3966        self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3967
3968    non_latin_bin_msg = textwrap.dedent("""\
3969        From: foo@bar.com
3970        To: báz
3971        Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3972        \tJean de Baddie
3973        Mime-Version: 1.0
3974        Content-Type: text/plain; charset="utf-8"
3975        Content-Transfer-Encoding: 8bit
3976
3977        Да, они летят.
3978        """).encode('utf-8')
3979
3980    def test_bytes_generator(self):
3981        msg = email.message_from_bytes(self.non_latin_bin_msg)
3982        out = BytesIO()
3983        email.generator.BytesGenerator(out).flatten(msg)
3984        self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3985
3986    def test_bytes_generator_handles_None_body(self):
3987        #Issue 11019
3988        msg = email.message.Message()
3989        out = BytesIO()
3990        email.generator.BytesGenerator(out).flatten(msg)
3991        self.assertEqual(out.getvalue(), b"\n")
3992
3993    non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
3994        From: foo@bar.com
3995        To: =?unknown-8bit?q?b=C3=A1z?=
3996        Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3997         =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3998         =?unknown-8bit?q?_Jean_de_Baddie?=
3999        Mime-Version: 1.0
4000        Content-Type: text/plain; charset="utf-8"
4001        Content-Transfer-Encoding: base64
4002
4003        0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
4004        """)
4005
4006    def test_generator_handles_8bit(self):
4007        msg = email.message_from_bytes(self.non_latin_bin_msg)
4008        out = StringIO()
4009        email.generator.Generator(out).flatten(msg)
4010        self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
4011
4012    def test_str_generator_should_not_mutate_msg_when_handling_8bit(self):
4013        msg = email.message_from_bytes(self.non_latin_bin_msg)
4014        out = BytesIO()
4015        BytesGenerator(out).flatten(msg)
4016        orig_value = out.getvalue()
4017        Generator(StringIO()).flatten(msg) # Should not mutate msg!
4018        out = BytesIO()
4019        BytesGenerator(out).flatten(msg)
4020        self.assertEqual(out.getvalue(), orig_value)
4021
4022    def test_bytes_generator_with_unix_from(self):
4023        # The unixfrom contains a current date, so we can't check it
4024        # literally.  Just make sure the first word is 'From' and the
4025        # rest of the message matches the input.
4026        msg = email.message_from_bytes(self.non_latin_bin_msg)
4027        out = BytesIO()
4028        email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
4029        lines = out.getvalue().split(b'\n')
4030        self.assertEqual(lines[0].split()[0], b'From')
4031        self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
4032
4033    non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
4034    non_latin_bin_msg_as7bit[2:4] = [
4035        'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
4036         'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
4037    non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
4038
4039    def test_message_from_binary_file(self):
4040        fn = 'test.msg'
4041        self.addCleanup(unlink, fn)
4042        with open(fn, 'wb') as testfile:
4043            testfile.write(self.non_latin_bin_msg)
4044        with open(fn, 'rb') as testfile:
4045            m = email.parser.BytesParser().parse(testfile)
4046        self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
4047
4048    latin_bin_msg = textwrap.dedent("""\
4049        From: foo@bar.com
4050        To: Dinsdale
4051        Subject: Nudge nudge, wink, wink
4052        Mime-Version: 1.0
4053        Content-Type: text/plain; charset="latin-1"
4054        Content-Transfer-Encoding: 8bit
4055
4056        oh là là, know what I mean, know what I mean?
4057        """).encode('latin-1')
4058
4059    latin_bin_msg_as7bit = textwrap.dedent("""\
4060        From: foo@bar.com
4061        To: Dinsdale
4062        Subject: Nudge nudge, wink, wink
4063        Mime-Version: 1.0
4064        Content-Type: text/plain; charset="iso-8859-1"
4065        Content-Transfer-Encoding: quoted-printable
4066
4067        oh l=E0 l=E0, know what I mean, know what I mean?
4068        """)
4069
4070    def test_string_generator_reencodes_to_quopri_when_appropriate(self):
4071        m = email.message_from_bytes(self.latin_bin_msg)
4072        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4073
4074    def test_decoded_generator_emits_unicode_body(self):
4075        m = email.message_from_bytes(self.latin_bin_msg)
4076        out = StringIO()
4077        email.generator.DecodedGenerator(out).flatten(m)
4078        #DecodedHeader output contains an extra blank line compared
4079        #to the input message.  RDM: not sure if this is a bug or not,
4080        #but it is not specific to the 8bit->7bit conversion.
4081        self.assertEqual(out.getvalue(),
4082            self.latin_bin_msg.decode('latin-1')+'\n')
4083
4084    def test_bytes_feedparser(self):
4085        bfp = email.feedparser.BytesFeedParser()
4086        for i in range(0, len(self.latin_bin_msg), 10):
4087            bfp.feed(self.latin_bin_msg[i:i+10])
4088        m = bfp.close()
4089        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4090
4091    def test_crlf_flatten(self):
4092        with openfile('msg_26.txt', 'rb') as fp:
4093            text = fp.read()
4094        msg = email.message_from_bytes(text)
4095        s = BytesIO()
4096        g = email.generator.BytesGenerator(s)
4097        g.flatten(msg, linesep='\r\n')
4098        self.assertEqual(s.getvalue(), text)
4099
4100    def test_8bit_multipart(self):
4101        # Issue 11605
4102        source = textwrap.dedent("""\
4103            Date: Fri, 18 Mar 2011 17:15:43 +0100
4104            To: foo@example.com
4105            From: foodwatch-Newsletter <bar@example.com>
4106            Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
4107            Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
4108            MIME-Version: 1.0
4109            Content-Type: multipart/alternative;
4110                    boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
4111
4112            --b1_76a486bee62b0d200f33dc2ca08220ad
4113            Content-Type: text/plain; charset="utf-8"
4114            Content-Transfer-Encoding: 8bit
4115
4116            Guten Tag, ,
4117
4118            mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
4119            Nachrichten aus Japan.
4120
4121
4122            --b1_76a486bee62b0d200f33dc2ca08220ad
4123            Content-Type: text/html; charset="utf-8"
4124            Content-Transfer-Encoding: 8bit
4125
4126            <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
4127                "http://www.w3.org/TR/html4/loose.dtd">
4128            <html lang="de">
4129            <head>
4130                    <title>foodwatch - Newsletter</title>
4131            </head>
4132            <body>
4133              <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
4134                 die Nachrichten aus Japan.</p>
4135            </body>
4136            </html>
4137            --b1_76a486bee62b0d200f33dc2ca08220ad--
4138
4139            """).encode('utf-8')
4140        msg = email.message_from_bytes(source)
4141        s = BytesIO()
4142        g = email.generator.BytesGenerator(s)
4143        g.flatten(msg)
4144        self.assertEqual(s.getvalue(), source)
4145
4146    def test_bytes_generator_b_encoding_linesep(self):
4147        # Issue 14062: b encoding was tacking on an extra \n.
4148        m = Message()
4149        # This has enough non-ascii that it should always end up b encoded.
4150        m['Subject'] = Header('žluťoučký kůň')
4151        s = BytesIO()
4152        g = email.generator.BytesGenerator(s)
4153        g.flatten(m, linesep='\r\n')
4154        self.assertEqual(
4155            s.getvalue(),
4156            b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4157
4158    def test_generator_b_encoding_linesep(self):
4159        # Since this broke in ByteGenerator, test Generator for completeness.
4160        m = Message()
4161        # This has enough non-ascii that it should always end up b encoded.
4162        m['Subject'] = Header('žluťoučký kůň')
4163        s = StringIO()
4164        g = email.generator.Generator(s)
4165        g.flatten(m, linesep='\r\n')
4166        self.assertEqual(
4167            s.getvalue(),
4168            'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4169
4170    maxDiff = None
4171
4172
4173class BaseTestBytesGeneratorIdempotent:
4174
4175    maxDiff = None
4176
4177    def _msgobj(self, filename):
4178        with openfile(filename, 'rb') as fp:
4179            data = fp.read()
4180        data = self.normalize_linesep_regex.sub(self.blinesep, data)
4181        msg = email.message_from_bytes(data)
4182        return msg, data
4183
4184    def _idempotent(self, msg, data, unixfrom=False):
4185        b = BytesIO()
4186        g = email.generator.BytesGenerator(b, maxheaderlen=0)
4187        g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
4188        self.assertEqual(data, b.getvalue())
4189
4190
4191class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
4192                                    TestIdempotent):
4193    linesep = '\n'
4194    blinesep = b'\n'
4195    normalize_linesep_regex = re.compile(br'\r\n')
4196
4197
4198class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
4199                                       TestIdempotent):
4200    linesep = '\r\n'
4201    blinesep = b'\r\n'
4202    normalize_linesep_regex = re.compile(br'(?<!\r)\n')
4203
4204
4205class TestBase64(unittest.TestCase):
4206    def test_len(self):
4207        eq = self.assertEqual
4208        eq(base64mime.header_length('hello'),
4209           len(base64mime.body_encode(b'hello', eol='')))
4210        for size in range(15):
4211            if   size == 0 : bsize = 0
4212            elif size <= 3 : bsize = 4
4213            elif size <= 6 : bsize = 8
4214            elif size <= 9 : bsize = 12
4215            elif size <= 12: bsize = 16
4216            else           : bsize = 20
4217            eq(base64mime.header_length('x' * size), bsize)
4218
4219    def test_decode(self):
4220        eq = self.assertEqual
4221        eq(base64mime.decode(''), b'')
4222        eq(base64mime.decode('aGVsbG8='), b'hello')
4223
4224    def test_encode(self):
4225        eq = self.assertEqual
4226        eq(base64mime.body_encode(b''), b'')
4227        eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
4228        # Test the binary flag
4229        eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
4230        # Test the maxlinelen arg
4231        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
4232eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4233eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4234eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4235eHh4eCB4eHh4IA==
4236""")
4237        # Test the eol argument
4238        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4239           """\
4240eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4241eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4242eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4243eHh4eCB4eHh4IA==\r
4244""")
4245
4246    def test_header_encode(self):
4247        eq = self.assertEqual
4248        he = base64mime.header_encode
4249        eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
4250        eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
4251        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4252        # Test the charset option
4253        eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
4254        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4255
4256
4257
4258class TestQuopri(unittest.TestCase):
4259    def setUp(self):
4260        # Set of characters (as byte integers) that don't need to be encoded
4261        # in headers.
4262        self.hlit = list(chain(
4263            range(ord('a'), ord('z') + 1),
4264            range(ord('A'), ord('Z') + 1),
4265            range(ord('0'), ord('9') + 1),
4266            (c for c in b'!*+-/')))
4267        # Set of characters (as byte integers) that do need to be encoded in
4268        # headers.
4269        self.hnon = [c for c in range(256) if c not in self.hlit]
4270        assert len(self.hlit) + len(self.hnon) == 256
4271        # Set of characters (as byte integers) that don't need to be encoded
4272        # in bodies.
4273        self.blit = list(range(ord(' '), ord('~') + 1))
4274        self.blit.append(ord('\t'))
4275        self.blit.remove(ord('='))
4276        # Set of characters (as byte integers) that do need to be encoded in
4277        # bodies.
4278        self.bnon = [c for c in range(256) if c not in self.blit]
4279        assert len(self.blit) + len(self.bnon) == 256
4280
4281    def test_quopri_header_check(self):
4282        for c in self.hlit:
4283            self.assertFalse(quoprimime.header_check(c),
4284                        'Should not be header quopri encoded: %s' % chr(c))
4285        for c in self.hnon:
4286            self.assertTrue(quoprimime.header_check(c),
4287                            'Should be header quopri encoded: %s' % chr(c))
4288
4289    def test_quopri_body_check(self):
4290        for c in self.blit:
4291            self.assertFalse(quoprimime.body_check(c),
4292                        'Should not be body quopri encoded: %s' % chr(c))
4293        for c in self.bnon:
4294            self.assertTrue(quoprimime.body_check(c),
4295                            'Should be body quopri encoded: %s' % chr(c))
4296
4297    def test_header_quopri_len(self):
4298        eq = self.assertEqual
4299        eq(quoprimime.header_length(b'hello'), 5)
4300        # RFC 2047 chrome is not included in header_length().
4301        eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
4302           quoprimime.header_length(b'hello') +
4303           # =?xxx?q?...?= means 10 extra characters
4304           10)
4305        eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4306        # RFC 2047 chrome is not included in header_length().
4307        eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
4308           quoprimime.header_length(b'h@e@l@l@o@') +
4309           # =?xxx?q?...?= means 10 extra characters
4310           10)
4311        for c in self.hlit:
4312            eq(quoprimime.header_length(bytes([c])), 1,
4313               'expected length 1 for %r' % chr(c))
4314        for c in self.hnon:
4315            # Space is special; it's encoded to _
4316            if c == ord(' '):
4317                continue
4318            eq(quoprimime.header_length(bytes([c])), 3,
4319               'expected length 3 for %r' % chr(c))
4320        eq(quoprimime.header_length(b' '), 1)
4321
4322    def test_body_quopri_len(self):
4323        eq = self.assertEqual
4324        for c in self.blit:
4325            eq(quoprimime.body_length(bytes([c])), 1)
4326        for c in self.bnon:
4327            eq(quoprimime.body_length(bytes([c])), 3)
4328
4329    def test_quote_unquote_idempotent(self):
4330        for x in range(256):
4331            c = chr(x)
4332            self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4333
4334    def _test_header_encode(self, header, expected_encoded_header, charset=None):
4335        if charset is None:
4336            encoded_header = quoprimime.header_encode(header)
4337        else:
4338            encoded_header = quoprimime.header_encode(header, charset)
4339        self.assertEqual(encoded_header, expected_encoded_header)
4340
4341    def test_header_encode_null(self):
4342        self._test_header_encode(b'', '')
4343
4344    def test_header_encode_one_word(self):
4345        self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4346
4347    def test_header_encode_two_lines(self):
4348        self._test_header_encode(b'hello\nworld',
4349                                '=?iso-8859-1?q?hello=0Aworld?=')
4350
4351    def test_header_encode_non_ascii(self):
4352        self._test_header_encode(b'hello\xc7there',
4353                                '=?iso-8859-1?q?hello=C7there?=')
4354
4355    def test_header_encode_alt_charset(self):
4356        self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4357                charset='iso-8859-2')
4358
4359    def _test_header_decode(self, encoded_header, expected_decoded_header):
4360        decoded_header = quoprimime.header_decode(encoded_header)
4361        self.assertEqual(decoded_header, expected_decoded_header)
4362
4363    def test_header_decode_null(self):
4364        self._test_header_decode('', '')
4365
4366    def test_header_decode_one_word(self):
4367        self._test_header_decode('hello', 'hello')
4368
4369    def test_header_decode_two_lines(self):
4370        self._test_header_decode('hello=0Aworld', 'hello\nworld')
4371
4372    def test_header_decode_non_ascii(self):
4373        self._test_header_decode('hello=C7there', 'hello\xc7there')
4374
4375    def test_header_decode_re_bug_18380(self):
4376        # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4377        self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4378
4379    def _test_decode(self, encoded, expected_decoded, eol=None):
4380        if eol is None:
4381            decoded = quoprimime.decode(encoded)
4382        else:
4383            decoded = quoprimime.decode(encoded, eol=eol)
4384        self.assertEqual(decoded, expected_decoded)
4385
4386    def test_decode_null_word(self):
4387        self._test_decode('', '')
4388
4389    def test_decode_null_line_null_word(self):
4390        self._test_decode('\r\n', '\n')
4391
4392    def test_decode_one_word(self):
4393        self._test_decode('hello', 'hello')
4394
4395    def test_decode_one_word_eol(self):
4396        self._test_decode('hello', 'hello', eol='X')
4397
4398    def test_decode_one_line(self):
4399        self._test_decode('hello\r\n', 'hello\n')
4400
4401    def test_decode_one_line_lf(self):
4402        self._test_decode('hello\n', 'hello\n')
4403
4404    def test_decode_one_line_cr(self):
4405        self._test_decode('hello\r', 'hello\n')
4406
4407    def test_decode_one_line_nl(self):
4408        self._test_decode('hello\n', 'helloX', eol='X')
4409
4410    def test_decode_one_line_crnl(self):
4411        self._test_decode('hello\r\n', 'helloX', eol='X')
4412
4413    def test_decode_one_line_one_word(self):
4414        self._test_decode('hello\r\nworld', 'hello\nworld')
4415
4416    def test_decode_one_line_one_word_eol(self):
4417        self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4418
4419    def test_decode_two_lines(self):
4420        self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4421
4422    def test_decode_two_lines_eol(self):
4423        self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4424
4425    def test_decode_one_long_line(self):
4426        self._test_decode('Spam' * 250, 'Spam' * 250)
4427
4428    def test_decode_one_space(self):
4429        self._test_decode(' ', '')
4430
4431    def test_decode_multiple_spaces(self):
4432        self._test_decode(' ' * 5, '')
4433
4434    def test_decode_one_line_trailing_spaces(self):
4435        self._test_decode('hello    \r\n', 'hello\n')
4436
4437    def test_decode_two_lines_trailing_spaces(self):
4438        self._test_decode('hello    \r\nworld   \r\n', 'hello\nworld\n')
4439
4440    def test_decode_quoted_word(self):
4441        self._test_decode('=22quoted=20words=22', '"quoted words"')
4442
4443    def test_decode_uppercase_quoting(self):
4444        self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4445
4446    def test_decode_lowercase_quoting(self):
4447        self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4448
4449    def test_decode_soft_line_break(self):
4450        self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4451
4452    def test_decode_false_quoting(self):
4453        self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4454
4455    def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4456        kwargs = {}
4457        if maxlinelen is None:
4458            # Use body_encode's default.
4459            maxlinelen = 76
4460        else:
4461            kwargs['maxlinelen'] = maxlinelen
4462        if eol is None:
4463            # Use body_encode's default.
4464            eol = '\n'
4465        else:
4466            kwargs['eol'] = eol
4467        encoded_body = quoprimime.body_encode(body, **kwargs)
4468        self.assertEqual(encoded_body, expected_encoded_body)
4469        if eol == '\n' or eol == '\r\n':
4470            # We know how to split the result back into lines, so maxlinelen
4471            # can be checked.
4472            for line in encoded_body.splitlines():
4473                self.assertLessEqual(len(line), maxlinelen)
4474
4475    def test_encode_null(self):
4476        self._test_encode('', '')
4477
4478    def test_encode_null_lines(self):
4479        self._test_encode('\n\n', '\n\n')
4480
4481    def test_encode_one_line(self):
4482        self._test_encode('hello\n', 'hello\n')
4483
4484    def test_encode_one_line_crlf(self):
4485        self._test_encode('hello\r\n', 'hello\n')
4486
4487    def test_encode_one_line_eol(self):
4488        self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4489
4490    def test_encode_one_line_eol_after_non_ascii(self):
4491        # issue 20206; see changeset 0cf700464177 for why the encode/decode.
4492        self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'),
4493                          'hello=CF=85\r\n', eol='\r\n')
4494
4495    def test_encode_one_space(self):
4496        self._test_encode(' ', '=20')
4497
4498    def test_encode_one_line_one_space(self):
4499        self._test_encode(' \n', '=20\n')
4500
4501# XXX: body_encode() expect strings, but uses ord(char) from these strings
4502# to index into a 256-entry list.  For code points above 255, this will fail.
4503# Should there be a check for 8-bit only ord() values in body, or at least
4504# a comment about the expected input?
4505
4506    def test_encode_two_lines_one_space(self):
4507        self._test_encode(' \n \n', '=20\n=20\n')
4508
4509    def test_encode_one_word_trailing_spaces(self):
4510        self._test_encode('hello   ', 'hello  =20')
4511
4512    def test_encode_one_line_trailing_spaces(self):
4513        self._test_encode('hello   \n', 'hello  =20\n')
4514
4515    def test_encode_one_word_trailing_tab(self):
4516        self._test_encode('hello  \t', 'hello  =09')
4517
4518    def test_encode_one_line_trailing_tab(self):
4519        self._test_encode('hello  \t\n', 'hello  =09\n')
4520
4521    def test_encode_trailing_space_before_maxlinelen(self):
4522        self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4523
4524    def test_encode_trailing_space_at_maxlinelen(self):
4525        self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4526
4527    def test_encode_trailing_space_beyond_maxlinelen(self):
4528        self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4529
4530    def test_encode_whitespace_lines(self):
4531        self._test_encode(' \n' * 5, '=20\n' * 5)
4532
4533    def test_encode_quoted_equals(self):
4534        self._test_encode('a = b', 'a =3D b')
4535
4536    def test_encode_one_long_string(self):
4537        self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4538
4539    def test_encode_one_long_line(self):
4540        self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4541
4542    def test_encode_one_very_long_line(self):
4543        self._test_encode('x' * 200 + '\n',
4544                2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4545
4546    def test_encode_shortest_maxlinelen(self):
4547        self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
4548
4549    def test_encode_maxlinelen_too_small(self):
4550        self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4551
4552    def test_encode(self):
4553        eq = self.assertEqual
4554        eq(quoprimime.body_encode(''), '')
4555        eq(quoprimime.body_encode('hello'), 'hello')
4556        # Test the binary flag
4557        eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
4558        # Test the maxlinelen arg
4559        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
4560xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4561 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4562x xxxx xxxx xxxx xxxx=20""")
4563        # Test the eol argument
4564        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4565           """\
4566xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4567 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4568x xxxx xxxx xxxx xxxx=20""")
4569        eq(quoprimime.body_encode("""\
4570one line
4571
4572two line"""), """\
4573one line
4574
4575two line""")
4576
4577
4578
4579# Test the Charset class
4580class TestCharset(unittest.TestCase):
4581    def tearDown(self):
4582        from email import charset as CharsetModule
4583        try:
4584            del CharsetModule.CHARSETS['fake']
4585        except KeyError:
4586            pass
4587
4588    def test_codec_encodeable(self):
4589        eq = self.assertEqual
4590        # Make sure us-ascii = no Unicode conversion
4591        c = Charset('us-ascii')
4592        eq(c.header_encode('Hello World!'), 'Hello World!')
4593        # Test 8-bit idempotency with us-ascii
4594        s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
4595        self.assertRaises(UnicodeError, c.header_encode, s)
4596        c = Charset('utf-8')
4597        eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
4598
4599    def test_body_encode(self):
4600        eq = self.assertEqual
4601        # Try a charset with QP body encoding
4602        c = Charset('iso-8859-1')
4603        eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
4604        # Try a charset with Base64 body encoding
4605        c = Charset('utf-8')
4606        eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
4607        # Try a charset with None body encoding
4608        c = Charset('us-ascii')
4609        eq('hello world', c.body_encode('hello world'))
4610        # Try the convert argument, where input codec != output codec
4611        c = Charset('euc-jp')
4612        # With apologies to Tokio Kikuchi ;)
4613        # XXX FIXME
4614##         try:
4615##             eq('\x1b$B5FCO;~IW\x1b(B',
4616##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4617##             eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4618##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4619##         except LookupError:
4620##             # We probably don't have the Japanese codecs installed
4621##             pass
4622        # Testing SF bug #625509, which we have to fake, since there are no
4623        # built-in encodings where the header encoding is QP but the body
4624        # encoding is not.
4625        from email import charset as CharsetModule
4626        CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
4627        c = Charset('fake')
4628        eq('hello world', c.body_encode('hello world'))
4629
4630    def test_unicode_charset_name(self):
4631        charset = Charset('us-ascii')
4632        self.assertEqual(str(charset), 'us-ascii')
4633        self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4634
4635
4636
4637# Test multilingual MIME headers.
4638class TestHeader(TestEmailBase):
4639    def test_simple(self):
4640        eq = self.ndiffAssertEqual
4641        h = Header('Hello World!')
4642        eq(h.encode(), 'Hello World!')
4643        h.append(' Goodbye World!')
4644        eq(h.encode(), 'Hello World!  Goodbye World!')
4645
4646    def test_simple_surprise(self):
4647        eq = self.ndiffAssertEqual
4648        h = Header('Hello World!')
4649        eq(h.encode(), 'Hello World!')
4650        h.append('Goodbye World!')
4651        eq(h.encode(), 'Hello World! Goodbye World!')
4652
4653    def test_header_needs_no_decoding(self):
4654        h = 'no decoding needed'
4655        self.assertEqual(decode_header(h), [(h, None)])
4656
4657    def test_long(self):
4658        h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4659                   maxlinelen=76)
4660        for l in h.encode(splitchars=' ').split('\n '):
4661            self.assertLessEqual(len(l), 76)
4662
4663    def test_multilingual(self):
4664        eq = self.ndiffAssertEqual
4665        g = Charset("iso-8859-1")
4666        cz = Charset("iso-8859-2")
4667        utf8 = Charset("utf-8")
4668        g_head = (b'Die Mieter treten hier ein werden mit einem '
4669                  b'Foerderband komfortabel den Korridor entlang, '
4670                  b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4671                  b'gegen die rotierenden Klingen bef\xf6rdert. ')
4672        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4673                   b'd\xf9vtipu.. ')
4674        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4675                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4676                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4677                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4678                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4679                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4680                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4681                     '\u3044\u307e\u3059\u3002')
4682        h = Header(g_head, g)
4683        h.append(cz_head, cz)
4684        h.append(utf8_head, utf8)
4685        enc = h.encode(maxlinelen=76)
4686        eq(enc, """\
4687=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4688 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4689 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4690 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
4691 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4692 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4693 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4694 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
4695 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4696 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4697 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4698        decoded = decode_header(enc)
4699        eq(len(decoded), 3)
4700        eq(decoded[0], (g_head, 'iso-8859-1'))
4701        eq(decoded[1], (cz_head, 'iso-8859-2'))
4702        eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
4703        ustr = str(h)
4704        eq(ustr,
4705           (b'Die Mieter treten hier ein werden mit einem Foerderband '
4706            b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4707            b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4708            b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4709            b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4710            b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4711            b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4712            b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4713            b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4714            b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4715            b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4716            b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4717            b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4718            b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4719            b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4720            b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4721            ).decode('utf-8'))
4722        # Test make_header()
4723        newh = make_header(decode_header(enc))
4724        eq(newh, h)
4725
4726    def test_empty_header_encode(self):
4727        h = Header()
4728        self.assertEqual(h.encode(), '')
4729
4730    def test_header_ctor_default_args(self):
4731        eq = self.ndiffAssertEqual
4732        h = Header()
4733        eq(h, '')
4734        h.append('foo', Charset('iso-8859-1'))
4735        eq(h, 'foo')
4736
4737    def test_explicit_maxlinelen(self):
4738        eq = self.ndiffAssertEqual
4739        hstr = ('A very long line that must get split to something other '
4740                'than at the 76th character boundary to test the non-default '
4741                'behavior')
4742        h = Header(hstr)
4743        eq(h.encode(), '''\
4744A very long line that must get split to something other than at the 76th
4745 character boundary to test the non-default behavior''')
4746        eq(str(h), hstr)
4747        h = Header(hstr, header_name='Subject')
4748        eq(h.encode(), '''\
4749A very long line that must get split to something other than at the
4750 76th character boundary to test the non-default behavior''')
4751        eq(str(h), hstr)
4752        h = Header(hstr, maxlinelen=1024, header_name='Subject')
4753        eq(h.encode(), hstr)
4754        eq(str(h), hstr)
4755
4756    def test_quopri_splittable(self):
4757        eq = self.ndiffAssertEqual
4758        h = Header(charset='iso-8859-1', maxlinelen=20)
4759        x = 'xxxx ' * 20
4760        h.append(x)
4761        s = h.encode()
4762        eq(s, """\
4763=?iso-8859-1?q?xxx?=
4764 =?iso-8859-1?q?x_?=
4765 =?iso-8859-1?q?xx?=
4766 =?iso-8859-1?q?xx?=
4767 =?iso-8859-1?q?_x?=
4768 =?iso-8859-1?q?xx?=
4769 =?iso-8859-1?q?x_?=
4770 =?iso-8859-1?q?xx?=
4771 =?iso-8859-1?q?xx?=
4772 =?iso-8859-1?q?_x?=
4773 =?iso-8859-1?q?xx?=
4774 =?iso-8859-1?q?x_?=
4775 =?iso-8859-1?q?xx?=
4776 =?iso-8859-1?q?xx?=
4777 =?iso-8859-1?q?_x?=
4778 =?iso-8859-1?q?xx?=
4779 =?iso-8859-1?q?x_?=
4780 =?iso-8859-1?q?xx?=
4781 =?iso-8859-1?q?xx?=
4782 =?iso-8859-1?q?_x?=
4783 =?iso-8859-1?q?xx?=
4784 =?iso-8859-1?q?x_?=
4785 =?iso-8859-1?q?xx?=
4786 =?iso-8859-1?q?xx?=
4787 =?iso-8859-1?q?_x?=
4788 =?iso-8859-1?q?xx?=
4789 =?iso-8859-1?q?x_?=
4790 =?iso-8859-1?q?xx?=
4791 =?iso-8859-1?q?xx?=
4792 =?iso-8859-1?q?_x?=
4793 =?iso-8859-1?q?xx?=
4794 =?iso-8859-1?q?x_?=
4795 =?iso-8859-1?q?xx?=
4796 =?iso-8859-1?q?xx?=
4797 =?iso-8859-1?q?_x?=
4798 =?iso-8859-1?q?xx?=
4799 =?iso-8859-1?q?x_?=
4800 =?iso-8859-1?q?xx?=
4801 =?iso-8859-1?q?xx?=
4802 =?iso-8859-1?q?_x?=
4803 =?iso-8859-1?q?xx?=
4804 =?iso-8859-1?q?x_?=
4805 =?iso-8859-1?q?xx?=
4806 =?iso-8859-1?q?xx?=
4807 =?iso-8859-1?q?_x?=
4808 =?iso-8859-1?q?xx?=
4809 =?iso-8859-1?q?x_?=
4810 =?iso-8859-1?q?xx?=
4811 =?iso-8859-1?q?xx?=
4812 =?iso-8859-1?q?_?=""")
4813        eq(x, str(make_header(decode_header(s))))
4814        h = Header(charset='iso-8859-1', maxlinelen=40)
4815        h.append('xxxx ' * 20)
4816        s = h.encode()
4817        eq(s, """\
4818=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4819 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4820 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4821 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4822 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4823        eq(x, str(make_header(decode_header(s))))
4824
4825    def test_base64_splittable(self):
4826        eq = self.ndiffAssertEqual
4827        h = Header(charset='koi8-r', maxlinelen=20)
4828        x = 'xxxx ' * 20
4829        h.append(x)
4830        s = h.encode()
4831        eq(s, """\
4832=?koi8-r?b?eHh4?=
4833 =?koi8-r?b?eCB4?=
4834 =?koi8-r?b?eHh4?=
4835 =?koi8-r?b?IHh4?=
4836 =?koi8-r?b?eHgg?=
4837 =?koi8-r?b?eHh4?=
4838 =?koi8-r?b?eCB4?=
4839 =?koi8-r?b?eHh4?=
4840 =?koi8-r?b?IHh4?=
4841 =?koi8-r?b?eHgg?=
4842 =?koi8-r?b?eHh4?=
4843 =?koi8-r?b?eCB4?=
4844 =?koi8-r?b?eHh4?=
4845 =?koi8-r?b?IHh4?=
4846 =?koi8-r?b?eHgg?=
4847 =?koi8-r?b?eHh4?=
4848 =?koi8-r?b?eCB4?=
4849 =?koi8-r?b?eHh4?=
4850 =?koi8-r?b?IHh4?=
4851 =?koi8-r?b?eHgg?=
4852 =?koi8-r?b?eHh4?=
4853 =?koi8-r?b?eCB4?=
4854 =?koi8-r?b?eHh4?=
4855 =?koi8-r?b?IHh4?=
4856 =?koi8-r?b?eHgg?=
4857 =?koi8-r?b?eHh4?=
4858 =?koi8-r?b?eCB4?=
4859 =?koi8-r?b?eHh4?=
4860 =?koi8-r?b?IHh4?=
4861 =?koi8-r?b?eHgg?=
4862 =?koi8-r?b?eHh4?=
4863 =?koi8-r?b?eCB4?=
4864 =?koi8-r?b?eHh4?=
4865 =?koi8-r?b?IA==?=""")
4866        eq(x, str(make_header(decode_header(s))))
4867        h = Header(charset='koi8-r', maxlinelen=40)
4868        h.append(x)
4869        s = h.encode()
4870        eq(s, """\
4871=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4872 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4873 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4874 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4875 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4876 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4877        eq(x, str(make_header(decode_header(s))))
4878
4879    def test_us_ascii_header(self):
4880        eq = self.assertEqual
4881        s = 'hello'
4882        x = decode_header(s)
4883        eq(x, [('hello', None)])
4884        h = make_header(x)
4885        eq(s, h.encode())
4886
4887    def test_string_charset(self):
4888        eq = self.assertEqual
4889        h = Header()
4890        h.append('hello', 'iso-8859-1')
4891        eq(h, 'hello')
4892
4893##    def test_unicode_error(self):
4894##        raises = self.assertRaises
4895##        raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4896##        raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4897##        h = Header()
4898##        raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4899##        raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4900##        raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4901
4902    def test_utf8_shortest(self):
4903        eq = self.assertEqual
4904        h = Header('p\xf6stal', 'utf-8')
4905        eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4906        h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4907        eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4908
4909    def test_bad_8bit_header(self):
4910        raises = self.assertRaises
4911        eq = self.assertEqual
4912        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4913        raises(UnicodeError, Header, x)
4914        h = Header()
4915        raises(UnicodeError, h.append, x)
4916        e = x.decode('utf-8', 'replace')
4917        eq(str(Header(x, errors='replace')), e)
4918        h.append(x, errors='replace')
4919        eq(str(h), e)
4920
4921    def test_escaped_8bit_header(self):
4922        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4923        e = x.decode('ascii', 'surrogateescape')
4924        h = Header(e, charset=email.charset.UNKNOWN8BIT)
4925        self.assertEqual(str(h),
4926                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4927        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4928
4929    def test_header_handles_binary_unknown8bit(self):
4930        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4931        h = Header(x, charset=email.charset.UNKNOWN8BIT)
4932        self.assertEqual(str(h),
4933                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4934        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4935
4936    def test_make_header_handles_binary_unknown8bit(self):
4937        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4938        h = Header(x, charset=email.charset.UNKNOWN8BIT)
4939        h2 = email.header.make_header(email.header.decode_header(h))
4940        self.assertEqual(str(h2),
4941                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4942        self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4943
4944    def test_modify_returned_list_does_not_change_header(self):
4945        h = Header('test')
4946        chunks = email.header.decode_header(h)
4947        chunks.append(('ascii', 'test2'))
4948        self.assertEqual(str(h), 'test')
4949
4950    def test_encoded_adjacent_nonencoded(self):
4951        eq = self.assertEqual
4952        h = Header()
4953        h.append('hello', 'iso-8859-1')
4954        h.append('world')
4955        s = h.encode()
4956        eq(s, '=?iso-8859-1?q?hello?= world')
4957        h = make_header(decode_header(s))
4958        eq(h.encode(), s)
4959
4960    def test_whitespace_keeper(self):
4961        eq = self.assertEqual
4962        s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4963        parts = decode_header(s)
4964        eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
4965        hdr = make_header(parts)
4966        eq(hdr.encode(),
4967           'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4968
4969    def test_broken_base64_header(self):
4970        raises = self.assertRaises
4971        s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
4972        raises(errors.HeaderParseError, decode_header, s)
4973
4974    def test_shift_jis_charset(self):
4975        h = Header('文', charset='shift_jis')
4976        self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4977
4978    def test_flatten_header_with_no_value(self):
4979        # Issue 11401 (regression from email 4.x)  Note that the space after
4980        # the header doesn't reflect the input, but this is also the way
4981        # email 4.x behaved.  At some point it would be nice to fix that.
4982        msg = email.message_from_string("EmptyHeader:")
4983        self.assertEqual(str(msg), "EmptyHeader: \n\n")
4984
4985    def test_encode_preserves_leading_ws_on_value(self):
4986        msg = Message()
4987        msg['SomeHeader'] = '   value with leading ws'
4988        self.assertEqual(str(msg), "SomeHeader:    value with leading ws\n\n")
4989
4990    def test_whitespace_header(self):
4991        self.assertEqual(Header(' ').encode(), ' ')
4992
4993
4994
4995# Test RFC 2231 header parameters (en/de)coding
4996class TestRFC2231(TestEmailBase):
4997
4998    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4999    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5000    def test_get_param(self):
5001        eq = self.assertEqual
5002        msg = self._msgobj('msg_29.txt')
5003        eq(msg.get_param('title'),
5004           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5005        eq(msg.get_param('title', unquote=False),
5006           ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
5007
5008    def test_set_param(self):
5009        eq = self.ndiffAssertEqual
5010        msg = Message()
5011        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5012                      charset='us-ascii')
5013        eq(msg.get_param('title'),
5014           ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
5015        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5016                      charset='us-ascii', language='en')
5017        eq(msg.get_param('title'),
5018           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5019        msg = self._msgobj('msg_01.txt')
5020        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5021                      charset='us-ascii', language='en')
5022        eq(msg.as_string(maxheaderlen=78), """\
5023Return-Path: <bbb@zzz.org>
5024Delivered-To: bbb@zzz.org
5025Received: by mail.zzz.org (Postfix, from userid 889)
5026\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5027MIME-Version: 1.0
5028Content-Transfer-Encoding: 7bit
5029Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
5030From: bbb@ddd.com (John X. Doe)
5031To: bbb@zzz.org
5032Subject: This is a test message
5033Date: Fri, 4 May 2001 14:05:44 -0400
5034Content-Type: text/plain; charset=us-ascii;
5035 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5036
5037
5038Hi,
5039
5040Do you like this message?
5041
5042-Me
5043""")
5044
5045    def test_set_param_requote(self):
5046        msg = Message()
5047        msg.set_param('title', 'foo')
5048        self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
5049        msg.set_param('title', 'bar', requote=False)
5050        self.assertEqual(msg['content-type'], 'text/plain; title=bar')
5051        # tspecial is still quoted.
5052        msg.set_param('title', "(bar)bell", requote=False)
5053        self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
5054
5055    def test_del_param(self):
5056        eq = self.ndiffAssertEqual
5057        msg = self._msgobj('msg_01.txt')
5058        msg.set_param('foo', 'bar', charset='us-ascii', language='en')
5059        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5060            charset='us-ascii', language='en')
5061        msg.del_param('foo', header='Content-Type')
5062        eq(msg.as_string(maxheaderlen=78), """\
5063Return-Path: <bbb@zzz.org>
5064Delivered-To: bbb@zzz.org
5065Received: by mail.zzz.org (Postfix, from userid 889)
5066\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5067MIME-Version: 1.0
5068Content-Transfer-Encoding: 7bit
5069Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
5070From: bbb@ddd.com (John X. Doe)
5071To: bbb@zzz.org
5072Subject: This is a test message
5073Date: Fri, 4 May 2001 14:05:44 -0400
5074Content-Type: text/plain; charset="us-ascii";
5075 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5076
5077
5078Hi,
5079
5080Do you like this message?
5081
5082-Me
5083""")
5084
5085    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
5086    # I changed the charset name, though, because the one in the file isn't
5087    # a legal charset name.  Should add a test for an illegal charset.
5088    def test_rfc2231_get_content_charset(self):
5089        eq = self.assertEqual
5090        msg = self._msgobj('msg_32.txt')
5091        eq(msg.get_content_charset(), 'us-ascii')
5092
5093    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
5094    def test_rfc2231_parse_rfc_quoting(self):
5095        m = textwrap.dedent('''\
5096            Content-Disposition: inline;
5097            \tfilename*0*=''This%20is%20even%20more%20;
5098            \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
5099            \tfilename*2="is it not.pdf"
5100
5101            ''')
5102        msg = email.message_from_string(m)
5103        self.assertEqual(msg.get_filename(),
5104                         'This is even more ***fun*** is it not.pdf')
5105        self.assertEqual(m, msg.as_string())
5106
5107    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5108    def test_rfc2231_parse_extra_quoting(self):
5109        m = textwrap.dedent('''\
5110            Content-Disposition: inline;
5111            \tfilename*0*="''This%20is%20even%20more%20";
5112            \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5113            \tfilename*2="is it not.pdf"
5114
5115            ''')
5116        msg = email.message_from_string(m)
5117        self.assertEqual(msg.get_filename(),
5118                         'This is even more ***fun*** is it not.pdf')
5119        self.assertEqual(m, msg.as_string())
5120
5121    # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
5122    # but new test uses *0* because otherwise lang/charset is not valid.
5123    # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
5124    def test_rfc2231_no_language_or_charset(self):
5125        m = '''\
5126Content-Transfer-Encoding: 8bit
5127Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
5128Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
5129
5130'''
5131        msg = email.message_from_string(m)
5132        param = msg.get_param('NAME')
5133        self.assertNotIsInstance(param, tuple)
5134        self.assertEqual(
5135            param,
5136            'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
5137
5138    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
5139    def test_rfc2231_no_language_or_charset_in_filename(self):
5140        m = '''\
5141Content-Disposition: inline;
5142\tfilename*0*="''This%20is%20even%20more%20";
5143\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5144\tfilename*2="is it not.pdf"
5145
5146'''
5147        msg = email.message_from_string(m)
5148        self.assertEqual(msg.get_filename(),
5149                         'This is even more ***fun*** is it not.pdf')
5150
5151    # Duplicate of previous test?
5152    def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
5153        m = '''\
5154Content-Disposition: inline;
5155\tfilename*0*="''This%20is%20even%20more%20";
5156\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5157\tfilename*2="is it not.pdf"
5158
5159'''
5160        msg = email.message_from_string(m)
5161        self.assertEqual(msg.get_filename(),
5162                         'This is even more ***fun*** is it not.pdf')
5163
5164    # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
5165    # but the test below is wrong (the first part should be decoded).
5166    def test_rfc2231_partly_encoded(self):
5167        m = '''\
5168Content-Disposition: inline;
5169\tfilename*0="''This%20is%20even%20more%20";
5170\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5171\tfilename*2="is it not.pdf"
5172
5173'''
5174        msg = email.message_from_string(m)
5175        self.assertEqual(
5176            msg.get_filename(),
5177            'This%20is%20even%20more%20***fun*** is it not.pdf')
5178
5179    def test_rfc2231_partly_nonencoded(self):
5180        m = '''\
5181Content-Disposition: inline;
5182\tfilename*0="This%20is%20even%20more%20";
5183\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
5184\tfilename*2="is it not.pdf"
5185
5186'''
5187        msg = email.message_from_string(m)
5188        self.assertEqual(
5189            msg.get_filename(),
5190            'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
5191
5192    def test_rfc2231_no_language_or_charset_in_boundary(self):
5193        m = '''\
5194Content-Type: multipart/alternative;
5195\tboundary*0*="''This%20is%20even%20more%20";
5196\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
5197\tboundary*2="is it not.pdf"
5198
5199'''
5200        msg = email.message_from_string(m)
5201        self.assertEqual(msg.get_boundary(),
5202                         'This is even more ***fun*** is it not.pdf')
5203
5204    def test_rfc2231_no_language_or_charset_in_charset(self):
5205        # This is a nonsensical charset value, but tests the code anyway
5206        m = '''\
5207Content-Type: text/plain;
5208\tcharset*0*="This%20is%20even%20more%20";
5209\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
5210\tcharset*2="is it not.pdf"
5211
5212'''
5213        msg = email.message_from_string(m)
5214        self.assertEqual(msg.get_content_charset(),
5215                         'this is even more ***fun*** is it not.pdf')
5216
5217    # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
5218    def test_rfc2231_bad_encoding_in_filename(self):
5219        m = '''\
5220Content-Disposition: inline;
5221\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
5222\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5223\tfilename*2="is it not.pdf"
5224
5225'''
5226        msg = email.message_from_string(m)
5227        self.assertEqual(msg.get_filename(),
5228                         'This is even more ***fun*** is it not.pdf')
5229
5230    def test_rfc2231_bad_encoding_in_charset(self):
5231        m = """\
5232Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
5233
5234"""
5235        msg = email.message_from_string(m)
5236        # This should return None because non-ascii characters in the charset
5237        # are not allowed.
5238        self.assertEqual(msg.get_content_charset(), None)
5239
5240    def test_rfc2231_bad_character_in_charset(self):
5241        m = """\
5242Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
5243
5244"""
5245        msg = email.message_from_string(m)
5246        # This should return None because non-ascii characters in the charset
5247        # are not allowed.
5248        self.assertEqual(msg.get_content_charset(), None)
5249
5250    def test_rfc2231_bad_character_in_filename(self):
5251        m = '''\
5252Content-Disposition: inline;
5253\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
5254\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5255\tfilename*2*="is it not.pdf%E2"
5256
5257'''
5258        msg = email.message_from_string(m)
5259        self.assertEqual(msg.get_filename(),
5260                         'This is even more ***fun*** is it not.pdf\ufffd')
5261
5262    def test_rfc2231_unknown_encoding(self):
5263        m = """\
5264Content-Transfer-Encoding: 8bit
5265Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
5266
5267"""
5268        msg = email.message_from_string(m)
5269        self.assertEqual(msg.get_filename(), 'myfile.txt')
5270
5271    def test_rfc2231_single_tick_in_filename_extended(self):
5272        eq = self.assertEqual
5273        m = """\
5274Content-Type: application/x-foo;
5275\tname*0*=\"Frank's\"; name*1*=\" Document\"
5276
5277"""
5278        msg = email.message_from_string(m)
5279        charset, language, s = msg.get_param('name')
5280        eq(charset, None)
5281        eq(language, None)
5282        eq(s, "Frank's Document")
5283
5284    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5285    def test_rfc2231_single_tick_in_filename(self):
5286        m = """\
5287Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
5288
5289"""
5290        msg = email.message_from_string(m)
5291        param = msg.get_param('name')
5292        self.assertNotIsInstance(param, tuple)
5293        self.assertEqual(param, "Frank's Document")
5294
5295    def test_rfc2231_missing_tick(self):
5296        m = '''\
5297Content-Disposition: inline;
5298\tfilename*0*="'This%20is%20broken";
5299'''
5300        msg = email.message_from_string(m)
5301        self.assertEqual(
5302            msg.get_filename(),
5303            "'This is broken")
5304
5305    def test_rfc2231_missing_tick_with_encoded_non_ascii(self):
5306        m = '''\
5307Content-Disposition: inline;
5308\tfilename*0*="'This%20is%E2broken";
5309'''
5310        msg = email.message_from_string(m)
5311        self.assertEqual(
5312            msg.get_filename(),
5313            "'This is\ufffdbroken")
5314
5315    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
5316    def test_rfc2231_tick_attack_extended(self):
5317        eq = self.assertEqual
5318        m = """\
5319Content-Type: application/x-foo;
5320\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
5321
5322"""
5323        msg = email.message_from_string(m)
5324        charset, language, s = msg.get_param('name')
5325        eq(charset, 'us-ascii')
5326        eq(language, 'en-us')
5327        eq(s, "Frank's Document")
5328
5329    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
5330    def test_rfc2231_tick_attack(self):
5331        m = """\
5332Content-Type: application/x-foo;
5333\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5334
5335"""
5336        msg = email.message_from_string(m)
5337        param = msg.get_param('name')
5338        self.assertNotIsInstance(param, tuple)
5339        self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5340
5341    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
5342    def test_rfc2231_no_extended_values(self):
5343        eq = self.assertEqual
5344        m = """\
5345Content-Type: application/x-foo; name=\"Frank's Document\"
5346
5347"""
5348        msg = email.message_from_string(m)
5349        eq(msg.get_param('name'), "Frank's Document")
5350
5351    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
5352    def test_rfc2231_encoded_then_unencoded_segments(self):
5353        eq = self.assertEqual
5354        m = """\
5355Content-Type: application/x-foo;
5356\tname*0*=\"us-ascii'en-us'My\";
5357\tname*1=\" Document\";
5358\tname*2*=\" For You\"
5359
5360"""
5361        msg = email.message_from_string(m)
5362        charset, language, s = msg.get_param('name')
5363        eq(charset, 'us-ascii')
5364        eq(language, 'en-us')
5365        eq(s, 'My Document For You')
5366
5367    # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5368    # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
5369    def test_rfc2231_unencoded_then_encoded_segments(self):
5370        eq = self.assertEqual
5371        m = """\
5372Content-Type: application/x-foo;
5373\tname*0=\"us-ascii'en-us'My\";
5374\tname*1*=\" Document\";
5375\tname*2*=\" For You\"
5376
5377"""
5378        msg = email.message_from_string(m)
5379        charset, language, s = msg.get_param('name')
5380        eq(charset, 'us-ascii')
5381        eq(language, 'en-us')
5382        eq(s, 'My Document For You')
5383
5384    def test_should_not_hang_on_invalid_ew_messages(self):
5385        messages = ["""From: user@host.com
5386To: user@host.com
5387Bad-Header:
5388 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
5389 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
5390 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
5391
5392Hello!
5393""", """From: ����� �������� <xxx@xxx>
5394To: "xxx" <xxx@xxx>
5395Subject:   ��� ���������� ����� ����� � ��������� �� ����
5396MIME-Version: 1.0
5397Content-Type: text/plain; charset="windows-1251";
5398Content-Transfer-Encoding: 8bit
5399
5400�� ����� � ���� ������ ��� ��������
5401"""]
5402        for m in messages:
5403            with self.subTest(m=m):
5404                msg = email.message_from_string(m)
5405
5406
5407# Tests to ensure that signed parts of an email are completely preserved, as
5408# required by RFC1847 section 2.1.  Note that these are incomplete, because the
5409# email package does not currently always preserve the body.  See issue 1670765.
5410class TestSigned(TestEmailBase):
5411
5412    def _msg_and_obj(self, filename):
5413        with openfile(filename) as fp:
5414            original = fp.read()
5415            msg = email.message_from_string(original)
5416        return original, msg
5417
5418    def _signed_parts_eq(self, original, result):
5419        # Extract the first mime part of each message
5420        import re
5421        repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5422        inpart = repart.search(original).group(2)
5423        outpart = repart.search(result).group(2)
5424        self.assertEqual(outpart, inpart)
5425
5426    def test_long_headers_as_string(self):
5427        original, msg = self._msg_and_obj('msg_45.txt')
5428        result = msg.as_string()
5429        self._signed_parts_eq(original, result)
5430
5431    def test_long_headers_as_string_maxheaderlen(self):
5432        original, msg = self._msg_and_obj('msg_45.txt')
5433        result = msg.as_string(maxheaderlen=60)
5434        self._signed_parts_eq(original, result)
5435
5436    def test_long_headers_flatten(self):
5437        original, msg = self._msg_and_obj('msg_45.txt')
5438        fp = StringIO()
5439        Generator(fp).flatten(msg)
5440        result = fp.getvalue()
5441        self._signed_parts_eq(original, result)
5442
5443
5444
5445if __name__ == '__main__':
5446    unittest.main()
5447